540 files changed, 13954 insertions, 5261 deletions
diff --git a/.gitattributes b/.gitattributes
index 8d2fb108ba..ccc576c2e1 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -9,6 +9,8 @@ thirdparty/* linguist-vendored
 *.bat eol=crlf
 # And some test files where the EOL matters
 *.test.txt -text
+# And also the templates for Visual Studio files, which VS will always force CRLF on
+/misc/msvs/*.template eol=crlf
 
 # The above only works properly for Git 2.10+, so for older versions
 # we need to manually list the binary files we don't want modified.
diff --git a/.github/actions/godot-deps/action.yml b/.github/actions/godot-deps/action.yml
index 881eb7d1f6..cac72c436e 100644
--- a/.github/actions/godot-deps/action.yml
+++ b/.github/actions/godot-deps/action.yml
@@ -23,5 +23,5 @@ runs:
       shell: bash
       run: |
         python -c "import sys; print(sys.version)"
-        python -m pip install scons==4.4.0
+        python -m pip install scons==4.7.0
         scons --version
diff --git a/core/config/project_settings.cpp b/core/config/project_settings.cpp
index 1508469e27..75eea2ef50 100644
--- a/core/config/project_settings.cpp
+++ b/core/config/project_settings.cpp
@@ -1330,8 +1330,8 @@ void ProjectSettings::load_scene_groups_cache() {
 		for (const String &E : scene_paths) {
 			Array scene_groups = cf->get_value(E, "groups", Array());
 			HashSet<StringName> cache;
-			for (int i = 0; i < scene_groups.size(); ++i) {
-				cache.insert(scene_groups[i]);
+			for (const Variant &scene_group : scene_groups) {
+				cache.insert(scene_group);
 			}
 			add_scene_groups_cache(E, cache);
 		}
@@ -1438,6 +1438,7 @@ ProjectSettings::ProjectSettings() {
 	GLOBAL_DEF("application/run/disable_stdout", false);
 	GLOBAL_DEF("application/run/disable_stderr", false);
 	GLOBAL_DEF("application/run/print_header", true);
+	GLOBAL_DEF("application/run/enable_alt_space_menu", false);
 	GLOBAL_DEF_RST("application/config/use_hidden_project_data_directory", true);
 	GLOBAL_DEF("application/config/use_custom_user_dir", false);
 	GLOBAL_DEF("application/config/custom_user_dir_name", "");
diff --git a/core/extension/extension_api_dump.cpp b/core/extension/extension_api_dump.cpp
index 543dabfb16..69be2d2a8f 100644
--- a/core/extension/extension_api_dump.cpp
+++ b/core/extension/extension_api_dump.cpp
@@ -1313,9 +1313,7 @@ static bool compare_value(const String &p_path, const String &p_field, const Var
 	} else if (p_old_value.get_type() == Variant::DICTIONARY && p_new_value.get_type() == Variant::DICTIONARY) {
 		Dictionary old_dict = p_old_value;
 		Dictionary new_dict = p_new_value;
-		Array old_keys = old_dict.keys();
-		for (int i = 0; i < old_keys.size(); i++) {
-			Variant key = old_keys[i];
+		for (const Variant &key : old_dict.keys()) {
 			if (!new_dict.has(key)) {
 				failed = true;
 				print_error(vformat("Validate extension JSON: Error: Field '%s': %s was removed.", p_path, key));
@@ -1328,9 +1326,7 @@ static bool compare_value(const String &p_path, const String &p_field, const Var
 				failed = true;
 			}
 		}
-		Array new_keys = old_dict.keys();
-		for (int i = 0; i < new_keys.size(); i++) {
-			Variant key = new_keys[i];
+		for (const Variant &key : old_dict.keys()) {
 			if (!old_dict.has(key)) {
 				failed = true;
 				print_error(vformat("Validate extension JSON: Error: Field '%s': %s was added with value %s.", p_path, key, new_dict[key]));
@@ -1356,8 +1352,8 @@ static bool compare_dict_array(const Dictionary &p_old_api, const Dictionary &p_
 	Array new_api = p_new_api[p_base_array];
 	HashMap<String, Dictionary> new_api_assoc;
 
-	for (int i = 0; i < new_api.size(); i++) {
-		Dictionary elem = new_api[i];
+	for (const Variant &var : new_api) {
+		Dictionary elem = var;
 		ERR_FAIL_COND_V_MSG(!elem.has(p_name_field), false, vformat("Validate extension JSON: Element of base_array '%s' is missing field '%s'. This is a bug.", base_array, p_name_field));
 		String name = elem[p_name_field];
 		if (p_compare_operators && elem.has("right_type")) {
@@ -1367,8 +1363,8 @@ static bool compare_dict_array(const Dictionary &p_old_api, const Dictionary &p_
 	}
 
 	Array old_api = p_old_api[p_base_array];
-	for (int i = 0; i < old_api.size(); i++) {
-		Dictionary old_elem = old_api[i];
+	for (const Variant &var : old_api) {
+		Dictionary old_elem = var;
 		if (!old_elem.has(p_name_field)) {
 			failed = true;
 			print_error(vformat("Validate extension JSON: JSON file: element of base array '%s' is missing the field: '%s'.", base_array, p_name_field));
@@ -1508,16 +1504,16 @@ static bool compare_sub_dict_array(HashSet<String> &r_removed_classes_registered
 	Array new_api = p_new_api[p_outer];
 	HashMap<String, Dictionary> new_api_assoc;
 
-	for (int i = 0; i < new_api.size(); i++) {
-		Dictionary elem = new_api[i];
+	for (const Variant &var : new_api) {
+		Dictionary elem = var;
 		ERR_FAIL_COND_V_MSG(!elem.has(p_outer_name), false, vformat("Validate extension JSON: Element of base_array '%s' is missing field '%s'. This is a bug.", p_outer, p_outer_name));
 		new_api_assoc.insert(elem[p_outer_name], elem);
 	}
 
 	Array old_api = p_old_api[p_outer];
 
-	for (int i = 0; i < old_api.size(); i++) {
-		Dictionary old_elem = old_api[i];
+	for (const Variant &var : old_api) {
+		Dictionary old_elem = var;
 		if (!old_elem.has(p_outer_name)) {
 			failed = true;
 			print_error(vformat("Validate extension JSON: JSON file: element of base array '%s' is missing the field: '%s'.", p_outer, p_outer_name));
diff --git a/core/extension/gdextension.cpp b/core/extension/gdextension.cpp
index 0ed4c3380c..546c40e9cb 100644
--- a/core/extension/gdextension.cpp
+++ b/core/extension/gdextension.cpp
@@ -729,53 +729,18 @@ GDExtensionInterfaceFunctionPtr GDExtension::get_interface_function(const String
 
 Error GDExtension::open_library(const String &p_path, const String &p_entry_symbol) {
 	String abs_path = ProjectSettings::get_singleton()->globalize_path(p_path);
-#if defined(WINDOWS_ENABLED) && defined(TOOLS_ENABLED)
-	// If running on the editor on Windows, we copy the library and open the copy.
-	// This is so the original file isn't locked and can be updated by a compiler.
-	bool library_copied = false;
-	if (Engine::get_singleton()->is_editor_hint()) {
-		if (!FileAccess::exists(abs_path)) {
-			ERR_PRINT("GDExtension library not found: " + abs_path);
-			return ERR_FILE_NOT_FOUND;
-		}
-
-		// Copy the file to the same directory as the original with a prefix in the name.
-		// This is so relative path to dependencies are satisfied.
-		String copy_path = abs_path.get_base_dir().path_join("~" + abs_path.get_file());
+	String actual_lib_path;
+	Error err = OS::get_singleton()->open_dynamic_library(abs_path, library, true, &actual_lib_path, Engine::get_singleton()->is_editor_hint());
 
-		// If there's a left-over copy (possibly from a crash) then delete it first.
-		if (FileAccess::exists(copy_path)) {
-			DirAccess::remove_absolute(copy_path);
-		}
-
-		Error copy_err = DirAccess::copy_absolute(abs_path, copy_path);
-		if (copy_err) {
-			ERR_PRINT("Error copying GDExtension library: " + abs_path);
-			return ERR_CANT_CREATE;
-		}
-		FileAccess::set_hidden_attribute(copy_path, true);
-		library_copied = true;
-
-		// Save the copied path so it can be deleted later.
-		temp_lib_path = copy_path;
-
-		// Use the copy to open the library.
-		abs_path = copy_path;
+	if (actual_lib_path.get_file() != abs_path.get_file()) {
+		// If temporary files are generated, let's change the library path to point at the original,
+		// because that's what we want to check to see if it's changed.
+		library_path = actual_lib_path.get_base_dir().path_join(p_path.get_file());
 	}
-#endif
 
-	Error err = OS::get_singleton()->open_dynamic_library(abs_path, library, true, &library_path);
 	ERR_FAIL_COND_V_MSG(err == ERR_FILE_NOT_FOUND, err, "GDExtension dynamic library not found: " + abs_path);
 	ERR_FAIL_COND_V_MSG(err != OK, err, "Can't open GDExtension dynamic library: " + abs_path);
 
-#if defined(WINDOWS_ENABLED) && defined(TOOLS_ENABLED)
-	// If we copied the file, let's change the library path to point at the original,
-	// because that's what we want to check to see if it's changed.
-	if (library_copied) {
-		library_path = library_path.get_base_dir() + "\\" + p_path.get_file();
-	}
-#endif
-
 	void *entry_funcptr = nullptr;
 
 	err = OS::get_singleton()->get_dynamic_library_symbol_handle(library, p_entry_symbol, entry_funcptr, false);
@@ -803,13 +768,6 @@ void GDExtension::close_library() {
 	ERR_FAIL_NULL(library);
 	OS::get_singleton()->close_dynamic_library(library);
 
-#if defined(TOOLS_ENABLED) && defined(WINDOWS_ENABLED)
-	// Delete temporary copy of library if it exists.
-	if (!temp_lib_path.is_empty() && Engine::get_singleton()->is_editor_hint()) {
-		DirAccess::remove_absolute(temp_lib_path);
-	}
-#endif
-
 	library = nullptr;
 	class_icon_paths.clear();
 
@@ -1014,13 +972,6 @@ Error GDExtensionResourceLoader::load_gdextension_resource(const String &p_path,
 
 	err = p_extension->open_library(is_static_library ? String() : library_path, entry_symbol);
 	if (err != OK) {
-#if defined(WINDOWS_ENABLED) && defined(TOOLS_ENABLED)
-		// If the DLL fails to load, make sure that temporary DLL copies are cleaned up.
-		if (Engine::get_singleton()->is_editor_hint()) {
-			DirAccess::remove_absolute(p_extension->get_temp_library_path());
-		}
-#endif
-
 		// Unreference the extension so that this loading can be considered a failure.
 		p_extension.unref();
 
diff --git a/core/extension/gdextension.h b/core/extension/gdextension.h
index 2d0cb6a5ba..1f48edecf7 100644
--- a/core/extension/gdextension.h
+++ b/core/extension/gdextension.h
@@ -47,9 +47,6 @@ class GDExtension : public Resource {
 
 	void *library = nullptr; // pointer if valid,
 	String library_path;
-#if defined(WINDOWS_ENABLED) && defined(TOOLS_ENABLED)
-	String temp_lib_path;
-#endif
 	bool reloadable = false;
 
 	struct Extension {
@@ -130,10 +127,6 @@ public:
 	Error open_library(const String &p_path, const String &p_entry_symbol);
 	void close_library();
 
-#if defined(WINDOWS_ENABLED) && defined(TOOLS_ENABLED)
-	String get_temp_library_path() const { return temp_lib_path; }
-#endif
-
 	enum InitializationLevel {
 		INITIALIZATION_LEVEL_CORE = GDEXTENSION_INITIALIZATION_CORE,
 		INITIALIZATION_LEVEL_SERVERS = GDEXTENSION_INITIALIZATION_SERVERS,
diff --git a/core/input/gamecontrollerdb.txt b/core/input/gamecontrollerdb.txt
index 420de7f9b7..a17e15a62c 100644
--- a/core/input/gamecontrollerdb.txt
+++ b/core/input/gamecontrollerdb.txt
@@ -23,7 +23,7 @@
 03000000c82d00001151000000000000,8BitDo Lite SE,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b4,y:b3,platform:Windows,
 03000000c82d00000150000000000000,8BitDo M30,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,rightx:a3,righty:a5,start:b11,x:b4,y:b3,platform:Windows,
 03000000c82d00000151000000000000,8BitDo M30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftx:a0,lefty:a2,rightshoulder:b6,righttrigger:b7,rightx:a3,righty:a5,start:b11,x:b3,y:b4,platform:Windows,
-03000000c82d00000650000000000000,8BitDo M30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b8,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,start:b11,x:b3,y:b4,platform:Windows,
+03000000c82d00000650000000000000,8BitDo M30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b8,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
 03000000c82d00005106000000000000,8BitDo M30,a:b0,b:b1,back:b10,dpdown:+a2,dpleft:-a0,dpright:+a0,dpup:-a2,guide:b2,leftshoulder:b8,lefttrigger:b9,rightshoulder:b6,righttrigger:b7,start:b11,x:b3,y:b4,platform:Windows,
 03000000c82d00002090000000000000,8BitDo Micro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b4,y:b3,platform:Windows,
 03000000c82d00000310000000000000,8BitDo N30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Windows,
@@ -104,6 +104,7 @@
 030000007c1800000006000000000000,Alienware Dual Compatible PlayStation Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a3,righty:a2,start:b9,x:b0,y:b3,platform:Windows,
 03000000491900001904000000000000,Amazon Luna Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b9,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b7,x:b2,y:b3,platform:Windows,
 03000000710100001904000000000000,Amazon Luna Controller,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b11,leftshoulder:b5,leftstick:b8,leftx:a0,lefty:a1,misc1:b9,rightshoulder:b4,rightstick:b7,rightx:a3,righty:a4,start:b6,x:b3,y:b2,platform:Windows,
+0300000008100000e501000000000000,Anbernic Game Pad,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
 03000000830500000160000000000000,Arcade,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b3,x:b4,y:b4,platform:Windows,
 03000000120c0000100e000000000000,Armor 3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
 03000000490b00004406000000000000,ASCII Seamic Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b2,start:b9,x:b3,y:b4,platform:Windows,
@@ -114,6 +115,10 @@
 03000000050b00000679000000000000,ASUS ROG Kunai 3,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
 03000000503200000110000000000000,Atari VCS Classic Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,start:b3,platform:Windows,
 03000000503200000210000000000000,Atari VCS Modern Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,rightx:a3,righty:a4,start:b9,x:b2,y:b3,platform:Windows,
+030000008a3500000102000000000000,Backbone One,a:b4,b:b5,back:b14,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b16,leftshoulder:b10,leftstick:b17,lefttrigger:b12,leftx:a0,lefty:a1,rightshoulder:b11,rightstick:b18,righttrigger:b13,rightx:a3,righty:a4,start:b15,x:b7,y:b8,platform:Windows,
+030000008a3500000201000000000000,Backbone One,a:b4,b:b5,back:b14,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b16,leftshoulder:b10,leftstick:b17,lefttrigger:b12,leftx:a0,lefty:a1,rightshoulder:b11,rightstick:b18,righttrigger:b13,rightx:a3,righty:a4,start:b15,x:b7,y:b8,platform:Windows,
+030000008a3500000302000000000000,Backbone One,a:b4,b:b5,back:b14,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b16,leftshoulder:b10,leftstick:b17,lefttrigger:b12,leftx:a0,lefty:a1,rightshoulder:b11,rightstick:b18,righttrigger:b13,rightx:a3,righty:a4,start:b15,x:b7,y:b8,platform:Windows,
+030000008a3500000402000000000000,Backbone One,a:b4,b:b5,back:b14,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b16,leftshoulder:b10,leftstick:b17,lefttrigger:b12,leftx:a0,lefty:a1,rightshoulder:b11,rightstick:b18,righttrigger:b13,rightx:a3,righty:a4,start:b15,x:b7,y:b8,platform:Windows,
 03000000e4150000103f000000000000,Batarang,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:b10,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:b11,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Windows,
 03000000d6200000e557000000000000,Batarang PlayStation Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 03000000c01100001352000000000000,Battalife Joystick,a:b6,b:b7,back:b2,leftshoulder:b0,leftx:a0,lefty:a1,rightshoulder:b1,start:b3,x:b4,y:b5,platform:Windows,
@@ -167,6 +172,7 @@
 03000000317300000100000000000000,DualShock 3,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Windows,
 030000006f0e00003001000000000000,EA Sports PS3 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 03000000fc0400000250000000000000,Easy Grip,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b2,start:b9,x:b3,y:b4,platform:Windows,
+03000000bc2000000091000000000000,EasySMX Controller,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Windows,
 030000006e0500000a20000000000000,Elecom DUX60 MMO,a:b2,b:b3,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b8,leftstick:b14,lefttrigger:b12,leftx:a0,lefty:a1,rightshoulder:b11,rightstick:b15,righttrigger:b13,rightx:a3,righty:a4,start:b20,x:b0,y:b1,platform:Windows,
 03000000b80500000410000000000000,Elecom Gamepad,a:b2,b:b3,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b1,platform:Windows,
 03000000b80500000610000000000000,Elecom Gamepad,a:b2,b:b3,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b1,platform:Windows,
@@ -254,6 +260,8 @@
 030000000d0f00005f00000000000000,Hori Fighting Commander 4 PS3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 030000000d0f00005e00000000000000,Hori Fighting Commander 4 PS4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
 030000000d0f00008400000000000000,Hori Fighting Commander 5,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
+030000000d0f00006201000000000000,Hori Fighting Commander Octa,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,start:b9,x:b0,y:b3,platform:Windows,
+030000000d0f00006401000000000000,Hori Fighting Commander Octa,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:+a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:-a2,start:b7,x:b2,y:b3,platform:Windows,
 030000000d0f00005100000000000000,Hori Fighting Commander PS3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
 030000000d0f00008600000000000000,Hori Fighting Commander Xbox 360,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Windows,
 030000000d0f0000ba00000000000000,Hori Fighting Commander Xbox 360,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Windows,
@@ -556,6 +564,7 @@
 030000009b2800002300000000000000,Raphnet 3DO Adapter,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b2,start:b3,platform:Windows,
 030000009b2800006900000000000000,Raphnet 3DO Adapter,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b2,start:b3,platform:Windows,
 030000009b2800000800000000000000,Raphnet Dreamcast Adapter,a:b2,b:b1,dpdown:b5,dpleft:b6,dpright:b7,dpup:b4,lefttrigger:a2,leftx:a0,righttrigger:a3,righty:a1,start:b3,x:b10,y:b9,platform:Windows,
+030000009b280000d000000000000000,Raphnet Dreamcast Adapter,a:b1,b:b0,dpdown:b13,dpleft:b14,dpright:b15,dpup:b12,lefttrigger:+a5,leftx:a0,lefty:a1,righttrigger:+a2,start:b3,x:b5,y:b4,platform:Windows,
 030000009b2800006200000000000000,Raphnet GameCube Adapter,a:b0,b:b7,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b5,rightx:a3,righty:a4,start:b3,x:b1,y:b8,platform:Windows,
 030000009b2800003200000000000000,Raphnet GC and N64 Adapter,a:b0,b:b7,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,lefttrigger:+a5,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:+a2,rightx:a3,righty:a4,start:b3,x:b1,y:b8,platform:Windows,
 030000009b2800006000000000000000,Raphnet GC and N64 Adapter,a:b0,b:b7,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,lefttrigger:+a5,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:+a2,rightx:a3,righty:a4,start:b3,x:b1,y:b8,platform:Windows,
@@ -567,6 +576,9 @@
 030000009b2800004300000000000000,Raphnet Saturn,a:b0,b:b1,dpdown:b13,dpleft:b14,dpright:b15,dpup:b12,leftshoulder:b6,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b2,start:b8,x:b3,y:b4,platform:Windows,
 030000009b2800000500000000000000,Raphnet Saturn Adapter 2.0,a:b1,b:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,lefttrigger:b4,rightshoulder:b7,righttrigger:b5,start:b9,x:b0,y:b3,platform:Windows,
 030000009b2800000300000000000000,Raphnet SNES Adapter,a:b0,b:b4,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,start:b3,x:b1,y:b5,platform:Windows,
+030000009b2800002600000000000000,Raphnet SNES Adapter,a:b1,b:b4,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Windows,
+030000009b2800002e00000000000000,Raphnet SNES Adapter,a:b1,b:b4,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Windows,
+030000009b2800002f00000000000000,Raphnet SNES Adapter,a:b1,b:b4,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Windows,
 030000009b2800005600000000000000,Raphnet SNES Adapter,a:b1,b:b4,back:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Windows,
 030000009b2800005700000000000000,Raphnet SNES Adapter,a:b1,b:b4,back:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Windows,
 030000009b2800001e00000000000000,Raphnet Vectrex Adapter,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftx:a1,lefty:a2,x:b2,y:b3,platform:Windows,
@@ -652,6 +664,7 @@
 030000003b07000004a1000000000000,SFX,a:b0,b:b2,back:b7,leftshoulder:b6,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b9,righttrigger:b5,start:b8,x:b1,y:b3,platform:Windows,
 03000000f82100001900000000000000,Shogun Bros Chameleon X1,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Windows,
 03000000120c00001c1e000000000000,SnakeByte 4S PS4 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
+03000000140300000918000000000000,SNES Controller,a:b0,b:b1,back:b6,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,start:b7,x:b2,y:b3,platform:Windows,
 0300000081170000960a000000000000,SNES Controller,a:b4,b:b0,back:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b5,y:b1,platform:Windows,
 03000000811700009d0a000000000000,SNES Controller,a:b0,b:b4,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,start:b3,x:b1,y:b5,platform:Windows,
 030000008b2800000300000000000000,SNES Controller,a:b0,b:b4,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,start:b3,x:b1,y:b5,platform:Windows,
@@ -787,6 +800,7 @@
 03000000bc2000005060000000000000,Xiaomi XMGP01YM,+lefty:+a2,+righty:+a5,-lefty:-a1,-righty:-a4,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,start:b11,x:b3,y:b4,platform:Windows,
 xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Windows,
 030000007d0400000340000000000000,Xterminator Digital Gamepad,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:-a4,lefttrigger:+a4,leftx:a0,lefty:a1,paddle1:b7,paddle2:b6,rightshoulder:b5,rightstick:b9,righttrigger:b2,rightx:a3,righty:a5,start:b8,x:b3,y:b4,platform:Windows,
+030000002c3600000100000000000000,Yawman Arrow,+rightx:h0.2,+righty:h0.4,-rightx:h0.8,-righty:h0.1,a:b4,b:b5,back:b6,dpdown:b15,dpleft:b14,dpright:b16,dpup:b13,leftshoulder:b10,leftstick:b0,lefttrigger:-a4,leftx:a0,lefty:a1,paddle1:b11,paddle2:b12,rightshoulder:b8,rightstick:b9,righttrigger:+a4,start:b3,x:b1,y:b2,platform:Windows,
 03000000790000004f18000000000000,ZDT Android Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a3,righty:a4,start:b11,x:b3,y:b4,platform:Windows,
 03000000120c00000500000000000000,Zeroplus Adapter,a:b2,b:b1,back:b11,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b9,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b0,righttrigger:b5,rightx:a3,righty:a2,start:b8,x:b3,y:b0,platform:Windows,
 03000000120c0000101e000000000000,Zeroplus P4 Gamepad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
@@ -804,7 +818,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c82d00001151000000020000,8BitDo Lite SE,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000a30c00002400000006020000,8BitDo M30,a:b2,b:b1,dpdown:+a4,dpleft:-a3,dpright:+a3,dpup:-a4,guide:b9,leftshoulder:b6,lefttrigger:b5,rightshoulder:b4,righttrigger:b7,start:b8,x:b3,y:b0,platform:Mac OS X,
 03000000c82d00000151000000010000,8BitDo M30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
-03000000c82d00000650000001000000,8BitDo M30,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b8,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,start:b11,x:b3,y:b4,platform:Mac OS X,
+03000000c82d00000650000001000000,8BitDo M30,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b8,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000c82d00005106000000010000,8BitDo M30,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,guide:b2,leftshoulder:b6,lefttrigger:a5,rightshoulder:b7,righttrigger:a4,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d00002090000000010000,8BitDo Micro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d00000451000000010000,8BitDo N30,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftx:a0,lefty:a1,rightx:a2,righty:a3,start:b11,platform:Mac OS X,
@@ -813,7 +827,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c82d00006928000000010000,8BitDo N64,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,rightx:a2,righty:a3,start:b11,platform:Mac OS X,
 03000000c82d00002590000000010000,8BitDo NEOGEO,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000c82d00002590000001000000,8BitDo NEOGEO,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
-03000000c82d00002690000000010000,8BitDo NEOGEOa:b0,+leftx:h0.2,+lefty:h0.4,-leftx:h0.8,-lefty:h0.1,b:b1,back:b10,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Mac OS X,
+03000000c82d00002690000000010000,8BitDo NEOGEO,+leftx:h0.2,+lefty:h0.4,-leftx:h0.8,-lefty:h0.1,a:b0,b:b1,back:b10,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000003512000012ab000001000000,8BitDo NES30,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d000012ab000001000000,8BitDo NES30,a:b0,b:b1,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000c82d00002028000000010000,8BitDo NES30,a:b0,b:b1,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b3,y:b4,platform:Mac OS X,
@@ -829,6 +843,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c82d00000331000001000000,8BitDo Receiver,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d00000431000001000000,8BitDo Receiver,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d00002867000000010000,8BitDo S30,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b8,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,rightx:a2,righty:a3,start:b10,x:b3,y:b4,platform:Mac OS X,
+03000000c82d00003028000000010000,8Bitdo SFC30 Gamepad,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000102800000900000000000000,8BitDo SFC30 Joystick,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d00000351000000010000,8BitDo SN30,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000c82d00001290000001000000,8BitDo SN30,a:b1,b:b0,back:b10,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b11,x:b4,y:b3,platform:Mac OS X,
@@ -850,6 +865,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c82d00003032000000010000,8BitDo Zero 2,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,rightx:a2,righty:a31,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000491900001904000001010000,Amazon Luna Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b9,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b7,x:b2,y:b3,platform:Mac OS X,
 03000000710100001904000000010000,Amazon Luna Controller,a:b0,b:b1,back:b11,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b9,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Mac OS X,
+0300000008100000e501000019040000,Anbernic Gamepad,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a4,start:b11,x:b4,y:b3,platform:Mac OS X,
 03000000a30c00002700000003030000,Astro City Mini,a:b2,b:b1,back:b8,dpdown:+a4,dpleft:-a3,dpright:+a3,dpup:-a4,rightshoulder:b4,righttrigger:b5,start:b9,x:b3,y:b0,platform:Mac OS X,
 03000000a30c00002800000003030000,Astro City Mini,a:b2,b:b1,back:b8,leftx:a3,lefty:a4,rightshoulder:b4,righttrigger:b5,start:b9,x:b3,y:b0,platform:Mac OS X,
 03000000050b00000045000031000000,ASUS Gamepad,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
@@ -857,6 +873,11 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000050b00000679000000010000,ASUS ROG Kunai 3,a:b0,b:b1,back:b12,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b14,leftshoulder:b6,leftstick:b15,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b23,rightshoulder:b7,rightstick:b16,righttrigger:a4,rightx:a2,righty:a3,start:b13,x:b3,y:b4,platform:Mac OS X,
 03000000503200000110000047010000,Atari VCS Classic Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b3,start:b2,platform:Mac OS X,
 03000000503200000210000047010000,Atari VCS Modern Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b9,leftshoulder:b4,leftstick:b6,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a4,rightx:a2,righty:a3,start:b8,x:b2,y:b3,platform:Mac OS X,
+030000008a3500000102000000010000,Backbone One,a:b0,b:b1,back:b16,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b17,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b15,x:b2,y:b3,platform:Mac OS X,
+030000008a3500000201000000010000,Backbone One,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+030000008a3500000202000000010000,Backbone One,a:b0,b:b1,back:b16,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b17,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b15,x:b2,y:b3,platform:Mac OS X,
+030000008a3500000402000000010000,Backbone One,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+030000008a3500000302000000010000,Backbone One PlayStationÆ Edition,a:b0,b:b1,back:b16,dpdown:b11,dpleft:b13,dpright:b12,dpup:b10,guide:b17,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1~,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3~,start:b15,x:b2,y:b3,platform:Mac OS X,
 03000000c62400001a89000000010000,BDA MOGA XP5-X Plus,a:b0,b:b1,back:b12,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b14,leftshoulder:b6,leftstick:b15,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b16,righttrigger:a4,rightx:a2,righty:a3,start:b13,x:b3,y:b4,platform:Mac OS X,
 03000000c62400001b89000000010000,BDA MOGA XP5-X Plus,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000d62000002a79000000010000,BDA PS4 Fightpad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
@@ -901,6 +922,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000000d0f00006600000000010000,Horipad 4 PS4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000000d0f00006600000000000000,Horipad FPS Plus 4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 030000000d0f0000ee00000000010000,Horipad Mini 4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
+030000000d0f0000c100000072050000,Horipad Nintendo Switch Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000242e0000ff0b000000010000,Hyperkin N64 Adapter,a:b1,b:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a2,righty:a3,start:b9,platform:Mac OS X,
 03000000790000004e95000000010000,Hyperkin N64 Controller Adapter,a:b1,b:b2,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,leftshoulder:b7,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a5,righty:a2,start:b9,platform:Mac OS X,
 03000000830500006020000000000000,iBuffalo Super Famicom Controller,a:b1,b:b0,back:b6,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,start:b7,x:b3,y:b2,platform:Mac OS X,
@@ -947,7 +969,8 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000001008000001e5000006010000,NEXT SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,righttrigger:b6,start:b9,x:b3,y:b0,platform:Mac OS X,
 030000007e0500000920000000000000,Nintendo Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
 030000007e0500000920000001000000,Nintendo Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
-050000007e05000009200000ff070000,Nintendo Switch Pro Controller,a:b1,b:b0,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b9,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b10,x:b3,y:b2,platform:Mac OS X,
+030000007e0500000920000010020000,Nintendo Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b9,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b10,x:b2,y:b3,platform:Mac OS X,
+050000007e05000009200000ff070000,Nintendo Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b9,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b10,x:b2,y:b3,platform:Mac OS X,
 030000007e0500001920000001000000,NSO N64 Controller,+rightx:b8,+righty:b7,-rightx:b3,-righty:b2,a:b1,b:b0,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,misc1:b13,rightshoulder:b5,righttrigger:b10,start:b9,platform:Mac OS X,
 030000007e0500001720000001000000,NSO SNES Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b15,start:b9,x:b2,y:b3,platform:Mac OS X,
 03000000550900001472000025050000,NVIDIA Controller,a:b0,b:b1,back:b17,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b4,leftstick:b7,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a5,start:b6,x:b2,y:b3,platform:Mac OS X,
@@ -976,6 +999,8 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000005e040000e002000001000000,PXN P30 Pro Mobile,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b8,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Mac OS X,
 03000000222c00000225000000010000,Qanba Dragon Arcade Joystick (PS3),a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000222c00000020000000010000,Qanba Drone Arcade Stick,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
+030000009b2800005600000020020000,Raphnet SNES Adapter,a:b1,b:b4,back:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Mac OS X,
+030000009b2800008000000022020000,Raphnet Wii Classic Adapter,a:b1,b:b4,back:b2,dpdown:b13,dpleft:b14,dpright:b15,dpup:b12,guide:b10,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,rightx:a3,righty:a4,start:b3,x:b0,y:b5,platform:Mac OS X,
 030000008916000000fd000000000000,Razer Onza TE,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 03000000321500000204000000010000,Razer Panthera PS3 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000321500000104000000010000,Razer Panthera PS4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
@@ -986,7 +1011,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000003215000000090000163a0000,Razer Serval,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a4,rightx:a2,righty:a3,start:b7,x:b2,y:b3,platform:Mac OS X,
 0300000032150000030a000000000000,Razer Wildcat,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 03000000632500008005000000010000,Redgear,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Mac OS X,
-030000000d0f0000c100000072050000,Retro Bit Sega Genesis 6B Controller,a:b2,b:b1,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,lefttrigger:b8,rightshoulder:b6,righttrigger:b7,start:b9,x:b3,y:b0,platform:Mac OS X,
 03000000921200004547000000020000,Retro Bit Sega Genesis Controller Adapter,a:b0,b:b2,dpdown:+a2,dpleft:-a0,dpright:+a0,dpup:-a2,lefttrigger:b14,rightshoulder:b10,righttrigger:b4,start:b12,x:b6,y:b8,platform:Mac OS X,
 03000000790000001100000000000000,Retro Controller,a:b1,b:b2,back:b8,dpdown:+a4,dpleft:-a3,dpright:+a3,dpup:-a4,leftshoulder:b6,lefttrigger:b7,rightshoulder:b4,righttrigger:b5,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000790000001100000005010000,Retro Controller,a:b1,b:b2,back:b8,dpdown:+a4,dpleft:-a3,dpright:+a3,dpup:-a4,leftshoulder:b6,lefttrigger:b7,rightshoulder:b5,righttrigger:b4,start:b9,x:b0,y:b3,platform:Mac OS X,
@@ -1063,12 +1087,13 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000005e040000130b000009050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000130b000013050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 030000005e040000130b000015050000,Xbox Series Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+030000005e040000130b000017050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
+030000005e040000220b000017050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000172700004431000029010000,XiaoMi Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b15,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a6,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Mac OS X,
 03000000120c0000100e000000010000,Zeroplus P4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000120c0000101e000000010000,Zeroplus P4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 
 # Linux
-030000005e0400008e02000020010000,8BitDo Adapter,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 03000000c82d00000031000011010000,8BitDo Adapter,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000c82d00000951000000010000,8BitDo Dogbone,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftx:a0,lefty:a1,rightx:a2,righty:a3,start:b11,platform:Linux,
 03000000021000000090000011010000,8BitDo FC30 Pro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
@@ -1079,7 +1104,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000c82d00001151000011010000,8BitDo Lite SE,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
 05000000c82d00001151000000010000,8BitDo Lite SE,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
 03000000c82d00000151000000010000,8BitDo M30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
-03000000c82d00000650000011010000,8BitDo M30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,start:b11,x:b3,y:b4,platform:Linux,
+03000000c82d00000650000011010000,8BitDo M30,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b2,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 05000000c82d00005106000000010000,8BitDo M30,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b8,lefttrigger:b9,leftx:a0,lefty:a1,rightshoulder:b6,righttrigger:b7,start:b11,x:b3,y:b4,platform:Linux,
 03000000c82d00002090000011010000,8BitDo Micro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
 05000000c82d00002090000000010000,8BitDo Micro,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b4,y:b3,platform:Linux,
@@ -1151,6 +1176,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 05000000491900000204000021000000,Amazon Fire Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b17,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b12,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000491900001904000011010000,Amazon Luna Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,misc1:b9,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b7,x:b2,y:b3,platform:Linux,
 05000000710100001904000000010000,Amazon Luna Controller,a:b0,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b11,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Linux,
+0300000008100000e501000001010000,Anbernic Gamepad,a:b1,b:b0,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a4,start:b11,x:b3,y:b4,platform:Linux,
 03000000790000003018000011010000,Arcade Fightstick F300,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Linux,
 03000000a30c00002700000011010000,Astro City Mini,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,rightshoulder:b4,righttrigger:b5,start:b9,x:b3,y:b0,platform:Linux,
 03000000a30c00002800000011010000,Astro City Mini,a:b2,b:b1,back:b8,leftx:a0,lefty:a1,rightshoulder:b4,righttrigger:b5,start:b9,x:b3,y:b0,platform:Linux,
@@ -1158,17 +1184,21 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 05000000050b00000045000040000000,ASUS Gamepad,a:b0,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b6,leftshoulder:b4,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b10,x:b2,y:b3,platform:Linux,
 03000000050b00000579000011010000,ASUS ROG Kunai 3,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b36,paddle1:b52,paddle2:b53,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 05000000050b00000679000000010000,ASUS ROG Kunai 3,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b21,paddle1:b22,paddle2:b23,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
-03000000503200000110000000000000,Atari Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
-03000000503200000110000011010000,Atari Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
-05000000503200000110000000000000,Atari Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
-05000000503200000110000044010000,Atari Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
-05000000503200000110000046010000,Atari Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
-03000000503200000210000000000000,Atari Controller,a:b0,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a4,rightx:a2,righty:a3,start:b8,x:b2,y:b3,platform:Linux,
-03000000503200000210000011010000,Atari Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b3,y:b2,platform:Linux,
-05000000503200000210000000000000,Atari Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b3,y:b2,platform:Linux,
-05000000503200000210000045010000,Atari Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b3,y:b2,platform:Linux,
-05000000503200000210000046010000,Atari Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b3,y:b2,platform:Linux,
+03000000503200000110000000000000,Atari VCS Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
+03000000503200000110000011010000,Atari VCS Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
+05000000503200000110000000000000,Atari VCS Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
+05000000503200000110000044010000,Atari VCS Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
+05000000503200000110000046010000,Atari VCS Classic Controller,a:b0,b:b1,back:b2,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b4,start:b3,platform:Linux,
+03000000503200000210000000000000,Atari VCS Modern Controller,a:b0,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a4,rightx:a2,righty:a3,start:b8,x:b2,y:b3,platform:Linux,
+03000000503200000210000011010000,Atari VCS Modern Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b3,y:b2,platform:Linux,
+05000000503200000210000000000000,Atari VCS Modern Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b3,y:b2,platform:Linux,
+05000000503200000210000045010000,Atari VCS Modern Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b3,y:b2,platform:Linux,
+05000000503200000210000046010000,Atari VCS Modern Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b3,y:b2,platform:Linux,
 05000000503200000210000047010000,Atari VCS Modern Controller,a:b0,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:+a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:-a4,rightx:a2,righty:a3,start:b8,x:b2,y:b3,platform:Linux,
+030000008a3500000201000011010000,Backbone One,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
+030000008a3500000202000011010000,Backbone One,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
+030000008a3500000302000011010000,Backbone One,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
+030000008a3500000402000011010000,Backbone One,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000c62400001b89000011010000,BDA MOGA XP5X Plus,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000d62000002a79000011010000,BDA PS4 Fightpad,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 03000000c21100000791000011010000,Be1 GC101 Controller 1.03,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
@@ -1275,7 +1305,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000fd0500000030000000010000,InterAct GoPad,a:b3,b:b4,leftshoulder:b6,lefttrigger:b7,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b5,x:b0,y:b1,platform:Linux,
 03000000fd0500002a26000000010000,InterAct HammerHead FX,a:b3,b:b4,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b2,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b5,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b0,y:b1,platform:Linux,
 0500000049190000020400001b010000,Ipega PG 9069,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b161,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
-03000000632500007505000011010000,Ipega PG 9099,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
+03000000632500007505000011010000,Ipega PG 9099,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b0,platform:Linux,
 0500000049190000030400001b010000,Ipega PG9099,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:b8,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:b9,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 05000000491900000204000000000000,Ipega PG9118,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000300f00001001000010010000,Jess Tech Dual Analog Rumble,a:b2,b:b3,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b5,leftx:a0,lefty:a1,rightshoulder:b6,rightstick:b11,righttrigger:b7,rightx:a3,righty:a2,start:b9,x:b0,y:b1,platform:Linux,
@@ -1375,7 +1405,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000790000004518000010010000,Nexilux GameCube Controller Adapter,a:b1,b:b0,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:a4,rightx:a5,righty:a2,start:b9,x:b2,y:b3,platform:Linux,
 030000001008000001e5000010010000,NEXT SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,righttrigger:b6,start:b9,x:b3,y:b0,platform:Linux,
 060000007e0500003713000000000000,Nintendo 3DS,a:b0,b:b1,back:b8,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,rightx:a2,righty:a3,start:b9,x:b3,y:b2,platform:Linux,
-030000009b2800008000000020020000,Nintendo Classic Controller,a:b1,b:b4,back:b2,dpdown:b13,dpleft:b14,dpright:b15,dpup:b12,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Linux,
 030000007e0500003703000000016800,Nintendo GameCube Controller,a:b0,b:b2,dpdown:b6,dpleft:b4,dpright:b5,dpup:b7,lefttrigger:a4,leftx:a0,lefty:a1~,rightshoulder:b9,righttrigger:a5,rightx:a2,righty:a3~,start:b8,x:b1,y:b3,platform:Linux,
 03000000790000004618000010010000,Nintendo GameCube Controller Adapter,a:b1,b:b0,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,righttrigger:b5,rightx:a5~,righty:a2~,start:b9,x:b2,y:b3,platform:Linux,
 060000004e696e74656e646f20537700,Nintendo Switch Combined Joy-Cons,a:b0,b:b1,back:b9,dpdown:b15,dpleft:b16,dpright:b17,dpup:b14,guide:b11,leftshoulder:b5,leftstick:b12,lefttrigger:b7,leftx:a0,lefty:a1,misc1:b4,rightshoulder:b6,rightstick:b13,righttrigger:b8,rightx:a2,righty:a3,start:b10,x:b3,y:b2,platform:Linux,
@@ -1404,7 +1433,6 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 03000000451300000830000010010000,NYKO CORE,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 19000000010000000100000001010000,ODROID Go 2,a:b1,b:b0,dpdown:b7,dpleft:b8,dpright:b9,dpup:b6,guide:b10,leftshoulder:b4,leftstick:b12,lefttrigger:b11,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b13,righttrigger:b14,start:b15,x:b2,y:b3,platform:Linux,
 19000000010000000200000011000000,ODROID Go 2,a:b1,b:b0,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,guide:b12,leftshoulder:b4,leftstick:b14,lefttrigger:b13,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b15,righttrigger:b16,start:b17,x:b2,y:b3,platform:Linux,
-03000000c0160000dc27000001010000,OnyxSoft Dual JoyDivision,a:b0,b:b1,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,start:b6,x:b2,y:b3,platform:Linux,
 05000000362800000100000002010000,OUYA Controller,a:b0,b:b3,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,guide:b14,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,x:b1,y:b2,platform:Linux,
 05000000362800000100000003010000,OUYA Controller,a:b0,b:b3,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,guide:b14,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,x:b1,y:b2,platform:Linux,
 05000000362800000100000004010000,OUYA Controller,a:b0,b:b3,back:b14,dpdown:b9,dpleft:b10,dpright:b11,dpup:b8,leftshoulder:b4,leftstick:b6,lefttrigger:b12,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:b13,rightx:a3,righty:a4,start:b16,x:b1,y:b2,platform:Linux,
@@ -1494,6 +1522,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 030000009b2800004200000001010000,Raphnet Dual NES Adapter,a:b0,b:b1,back:b2,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,start:b3,platform:Linux,
 030000009b2800003200000001010000,Raphnet GC and N64 Adapter,a:b0,b:b7,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b5,rightx:a3,righty:a4,start:b3,x:b1,y:b8,platform:Linux,
 030000009b2800006000000001010000,Raphnet GC and N64 Adapter,a:b0,b:b7,dpdown:b11,dpleft:b12,dpright:b13,dpup:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b2,righttrigger:b5,rightx:a3,righty:a4,start:b3,x:b1,y:b8,platform:Linux,
+030000009b2800008000000020020000,Raphnet Wii Classic Adapter,a:b1,b:b4,back:b2,dpdown:b13,dpleft:b14,dpright:b15,dpup:b12,leftshoulder:b6,rightshoulder:b7,start:b3,x:b0,y:b5,platform:Linux,
 03000000f8270000bf0b000011010000,Razer Kishi,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 030000008916000001fd000024010000,Razer Onza Classic Edition,a:b0,b:b1,back:b6,dpdown:b14,dpleft:b11,dpright:b12,dpup:b13,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 03000000321500000204000011010000,Razer Panthera PS3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
@@ -1565,6 +1594,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 05000000de2800000511000001000000,Steam Controller,a:b0,b:b1,back:b6,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,paddle1:b11,paddle2:b10,rightshoulder:b5,righttrigger:a3,start:b7,x:b2,y:b3,platform:Linux,
 05000000de2800000611000001000000,Steam Controller,a:b0,b:b1,back:b6,dpdown:b14,dpleft:b15,dpright:b13,dpup:b12,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,paddle1:b11,paddle2:b10,rightshoulder:b5,righttrigger:a3,start:b7,x:b2,y:b3,platform:Linux,
 03000000de2800000512000010010000,Steam Deck,a:b3,b:b4,back:b11,dpdown:b17,dpleft:b18,dpright:b19,dpup:b16,guide:b13,leftshoulder:b7,leftstick:b14,lefttrigger:a9,leftx:a0,lefty:a1,rightshoulder:b8,rightstick:b15,righttrigger:a8,rightx:a2,righty:a3,start:b12,x:b5,y:b6,platform:Linux,
+03000000de2800000512000011010000,Steam Deck,a:b3,b:b4,back:b11,dpdown:b17,dpleft:b18,dpright:b19,dpup:b16,guide:b13,leftshoulder:b7,leftstick:b14,lefttrigger:a9,leftx:a0,lefty:a1,misc1:b2,paddle1:b21,paddle2:b20,paddle3:b23,paddle4:b22,rightshoulder:b8,rightstick:b15,righttrigger:a8,rightx:a2,righty:a3,start:b12,x:b5,y:b6,platform:Linux,
 03000000de280000ff11000001000000,Steam Virtual Gamepad,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 050000004e696d6275732b0000000000,SteelSeries Nimbus Plus,a:b0,b:b1,back:b10,guide:b11,leftshoulder:b4,leftstick:b8,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b9,righttrigger:b7,rightx:a2,righty:a3,start:b12,x:b2,y:b3,platform:Linux,
 03000000381000003014000075010000,SteelSeries Stratus Duo,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
@@ -1664,11 +1694,15 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 060000005e040000120b000007050000,Xbox Series Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 060000005e040000120b00000b050000,Xbox Series Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 060000005e040000120b00000f050000,Xbox Series Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,misc1:b11,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
+050000005e040000130b000017050000,Xbox Series X Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,misc1:b15,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 060000005e040000120b00000d050000,Xbox Series X Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,misc1:b11,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 050000005e040000200b000013050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
+050000005e040000200b000017050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
+050000005e040000220b000017050000,Xbox Wireless Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b6,leftstick:b13,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a4,rightx:a2,righty:a3,start:b11,x:b3,y:b4,platform:Linux,
 03000000450c00002043000010010000,XEOX SL6556 BK,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 05000000172700004431000029010000,XiaoMi Controller,a:b0,b:b1,back:b10,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b20,leftshoulder:b6,leftstick:b13,lefttrigger:a7,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b14,righttrigger:a6,rightx:a2,righty:a5,start:b11,x:b3,y:b4,platform:Linux,
 03000000c0160000e105000001010000,XinMo Dual Arcade,a:b4,b:b3,back:b6,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b9,leftshoulder:b2,leftx:a0,lefty:a1,rightshoulder:b5,start:b7,x:b1,y:b0,platform:Linux,
+030000005e0400008e02000020010000,XInput Adapter,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
 03000000120c0000100e000011010000,Zeroplus P4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 03000000120c0000101e000011010000,Zeroplus P4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
@@ -1755,6 +1789,7 @@ xinput,XInput Controller,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,
 63396666386564393334393236386630,8BitDo Zero 2,a:b1,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftx:a0,lefty:a1,rightshoulder:b10,start:b6,x:b3,y:b2,platform:Android,
 63633435623263373466343461646430,8BitDo Zero 2,a:b1,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftx:a0,lefty:a1,rightshoulder:b10,start:b6,x:b2,y:b3,platform:Android,
 32333634613735616163326165323731,Amazon Luna Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,x:b2,y:b3,platform:Android,
+4c696e757820342e31392e3137322077,Anbernic Gamepad,a:b1,b:b0,back:b15,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:b17,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:b18,rightx:a2,righty:a4,start:b6,x:b2,y:b3,platform:Android,
 417374726f2063697479206d696e6920,Astro City Mini,a:b23,b:b22,back:b29,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,rightshoulder:b25,righttrigger:b26,start:b30,x:b24,y:b21,platform:Android,
 35643263313264386134376362363435,Atari VCS Classic Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,start:b6,platform:Android,
 32353831643566306563643065356239,Atari VCS Modern Controller,a:b0,b:b1,back:b4,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b5,leftshoulder:b9,leftstick:b7,lefttrigger:a4,leftx:a0,lefty:a1,rightshoulder:b10,rightstick:b8,righttrigger:a5,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Android,
diff --git a/core/io/json.cpp b/core/io/json.cpp
index 496400a5ea..5a1fe45f70 100644
--- a/core/io/json.cpp
+++ b/core/io/json.cpp
@@ -86,7 +86,7 @@ String JSON::_stringify(const Variant &p_var, const String &p_indent, int p_cur_
 		case Variant::PACKED_STRING_ARRAY:
 		case Variant::ARRAY: {
 			Array a = p_var;
-			if (a.size() == 0) {
+			if (a.is_empty()) {
 				return "[]";
 			}
 			String s = "[";
@@ -95,12 +95,15 @@ String JSON::_stringify(const Variant &p_var, const String &p_indent, int p_cur_
 			ERR_FAIL_COND_V_MSG(p_markers.has(a.id()), "\"[...]\"", "Converting circular structure to JSON.");
 			p_markers.insert(a.id());
 
-			for (int i = 0; i < a.size(); i++) {
-				if (i > 0) {
+			bool first = true;
+			for (const Variant &var : a) {
+				if (first) {
+					first = false;
+				} else {
 					s += ",";
 					s += end_statement;
 				}
-				s += _make_indent(p_indent, p_cur_indent + 1) + _stringify(a[i], p_indent, p_cur_indent + 1, p_sort_keys, p_markers);
+				s += _make_indent(p_indent, p_cur_indent + 1) + _stringify(var, p_indent, p_cur_indent + 1, p_sort_keys, p_markers);
 			}
 			s += end_statement + _make_indent(p_indent, p_cur_indent) + "]";
 			p_markers.erase(a.id());
diff --git a/core/io/marshalls.cpp b/core/io/marshalls.cpp
index b25fcccd7f..4487b8e472 100644
--- a/core/io/marshalls.cpp
+++ b/core/io/marshalls.cpp
@@ -687,7 +687,7 @@ Error decode_variant(Variant &r_variant, const uint8_t *p_buffer, int p_len, int
 							(*r_len) += used;
 						}
 
-						if (str == "script") {
+						if (str == "script" && value.get_type() != Variant::NIL) {
 							ERR_FAIL_COND_V_MSG(value.get_type() != Variant::STRING, ERR_INVALID_DATA, "Invalid value for \"script\" property, expected script path as String.");
 							String path = value;
 							ERR_FAIL_COND_V_MSG(path.is_empty() || !path.begins_with("res://") || !ResourceLoader::exists(path, "Script"), ERR_INVALID_DATA, "Invalid script path: '" + path + "'.");
@@ -1729,9 +1729,9 @@ Error encode_variant(const Variant &p_variant, uint8_t *r_buffer, int &r_len, bo
 			}
 			r_len += 4;
 
-			for (int i = 0; i < array.size(); i++) {
+			for (const Variant &var : array) {
 				int len;
-				Error err = encode_variant(array.get(i), buf, len, p_full_objects, p_depth + 1);
+				Error err = encode_variant(var, buf, len, p_full_objects, p_depth + 1);
 				ERR_FAIL_COND_V(err, err);
 				ERR_FAIL_COND_V(len % 4, ERR_BUG);
 				if (buf) {
diff --git a/core/io/resource_format_binary.cpp b/core/io/resource_format_binary.cpp
index 17cffb878e..d0a8200546 100644
--- a/core/io/resource_format_binary.cpp
+++ b/core/io/resource_format_binary.cpp
@@ -1844,8 +1844,8 @@ void ResourceFormatSaverBinaryInstance::write_variant(Ref<FileAccess> f, const V
 			f->store_32(VARIANT_ARRAY);
 			Array a = p_property;
 			f->store_32(uint32_t(a.size()));
-			for (int i = 0; i < a.size(); i++) {
-				write_variant(f, a[i], resource_map, external_resources, string_map);
+			for (const Variant &var : a) {
+				write_variant(f, var, resource_map, external_resources, string_map);
 			}
 
 		} break;
@@ -2016,9 +2016,8 @@ void ResourceFormatSaverBinaryInstance::_find_resources(const Variant &p_variant
 
 		case Variant::ARRAY: {
 			Array varray = p_variant;
-			int len = varray.size();
-			for (int i = 0; i < len; i++) {
-				const Variant &v = varray.get(i);
+			_find_resources(varray.get_typed_script());
+			for (const Variant &v : varray) {
 				_find_resources(v);
 			}
 
diff --git a/core/io/resource_loader.cpp b/core/io/resource_loader.cpp
index 191abee315..eea6357084 100644
--- a/core/io/resource_loader.cpp
+++ b/core/io/resource_loader.cpp
@@ -1035,8 +1035,9 @@ void ResourceLoader::load_translation_remaps() {
 		Array langs = remaps[E];
 		Vector<String> lang_remaps;
 		lang_remaps.resize(langs.size());
-		for (int i = 0; i < langs.size(); i++) {
-			lang_remaps.write[i] = langs[i];
+		String *lang_remaps_ptrw = lang_remaps.ptrw();
+		for (const Variant &lang : langs) {
+			*lang_remaps_ptrw++ = lang;
 		}
 
 		translation_remaps[String(E)] = lang_remaps;
diff --git a/core/object/object.cpp b/core/object/object.cpp
index 8b6fd587e0..06f6e8e9e6 100644
--- a/core/object/object.cpp
+++ b/core/object/object.cpp
@@ -142,16 +142,16 @@ MethodInfo MethodInfo::from_dict(const Dictionary &p_dict) {
 		args = p_dict["args"];
 	}
 
-	for (int i = 0; i < args.size(); i++) {
-		Dictionary d = args[i];
+	for (const Variant &arg : args) {
+		Dictionary d = arg;
 		mi.arguments.push_back(PropertyInfo::from_dict(d));
 	}
 	Array defargs;
 	if (p_dict.has("default_args")) {
 		defargs = p_dict["default_args"];
 	}
-	for (int i = 0; i < defargs.size(); i++) {
-		mi.default_arguments.push_back(defargs[i]);
+	for (const Variant &defarg : defargs) {
+		mi.default_arguments.push_back(defarg);
 	}
 
 	if (p_dict.has("return")) {
@@ -1233,8 +1233,8 @@ void Object::_add_user_signal(const String &p_name, const Array &p_args) {
 	MethodInfo mi;
 	mi.name = p_name;
 
-	for (int i = 0; i < p_args.size(); i++) {
-		Dictionary d = p_args[i];
+	for (const Variant &arg : p_args) {
+		Dictionary d = arg;
 		PropertyInfo param;
 
 		if (d.has("name")) {
@@ -1585,8 +1585,8 @@ void Object::_clear_internal_resource_paths(const Variant &p_var) {
 		} break;
 		case Variant::ARRAY: {
 			Array a = p_var;
-			for (int i = 0; i < a.size(); i++) {
-				_clear_internal_resource_paths(a[i]);
+			for (const Variant &var : a) {
+				_clear_internal_resource_paths(var);
 			}
 
 		} break;
diff --git a/core/object/script_language.cpp b/core/object/script_language.cpp
index 73da0ba2af..660e13e819 100644
--- a/core/object/script_language.cpp
+++ b/core/object/script_language.cpp
@@ -251,8 +251,8 @@ void ScriptServer::init_languages() {
 		if (ProjectSettings::get_singleton()->has_setting("_global_script_classes")) {
 			Array script_classes = GLOBAL_GET("_global_script_classes");
 
-			for (int i = 0; i < script_classes.size(); i++) {
-				Dictionary c = script_classes[i];
+			for (const Variant &script_class : script_classes) {
+				Dictionary c = script_class;
 				if (!c.has("class") || !c.has("language") || !c.has("path") || !c.has("base")) {
 					continue;
 				}
@@ -263,8 +263,8 @@ void ScriptServer::init_languages() {
 #endif
 
 		Array script_classes = ProjectSettings::get_singleton()->get_global_class_list();
-		for (int i = 0; i < script_classes.size(); i++) {
-			Dictionary c = script_classes[i];
+		for (const Variant &script_class : script_classes) {
+			Dictionary c = script_class;
 			if (!c.has("class") || !c.has("language") || !c.has("path") || !c.has("base")) {
 				continue;
 			}
@@ -463,8 +463,8 @@ void ScriptServer::save_global_classes() {
 	Dictionary class_icons;
 
 	Array script_classes = ProjectSettings::get_singleton()->get_global_class_list();
-	for (int i = 0; i < script_classes.size(); i++) {
-		Dictionary d = script_classes[i];
+	for (const Variant &script_class : script_classes) {
+		Dictionary d = script_class;
 		if (!d.has("name") || !d.has("icon")) {
 			continue;
 		}
diff --git a/core/object/script_language_extension.h b/core/object/script_language_extension.h
index 1db322526d..cc6b729ae8 100644
--- a/core/object/script_language_extension.h
+++ b/core/object/script_language_extension.h
@@ -319,8 +319,8 @@ public:
 		}
 		if (r_errors != nullptr && ret.has("errors")) {
 			Array errors = ret["errors"];
-			for (int i = 0; i < errors.size(); i++) {
-				Dictionary err = errors[i];
+			for (const Variant &error : errors) {
+				Dictionary err = error;
 				ERR_CONTINUE(!err.has("line"));
 				ERR_CONTINUE(!err.has("column"));
 				ERR_CONTINUE(!err.has("message"));
@@ -339,8 +339,8 @@ public:
 		if (r_warnings != nullptr && ret.has("warnings")) {
 			ERR_FAIL_COND_V(!ret.has("warnings"), false);
 			Array warnings = ret["warnings"];
-			for (int i = 0; i < warnings.size(); i++) {
-				Dictionary warn = warnings[i];
+			for (const Variant &warning : warnings) {
+				Dictionary warn = warning;
 				ERR_CONTINUE(!warn.has("start_line"));
 				ERR_CONTINUE(!warn.has("end_line"));
 				ERR_CONTINUE(!warn.has("leftmost_column"));
@@ -402,8 +402,8 @@ public:
 
 		if (r_options != nullptr && ret.has("options")) {
 			Array options = ret["options"];
-			for (int i = 0; i < options.size(); i++) {
-				Dictionary op = options[i];
+			for (const Variant &var : options) {
+				Dictionary op = var;
 				CodeCompletionOption option;
 				ERR_CONTINUE(!op.has("kind"));
 				option.kind = CodeCompletionKind(int(op["kind"]));
@@ -502,8 +502,8 @@ public:
 		}
 		if (p_values != nullptr && ret.has("values")) {
 			Array values = ret["values"];
-			for (int i = 0; i < values.size(); i++) {
-				p_values->push_back(values[i]);
+			for (const Variant &value : values) {
+				p_values->push_back(value);
 			}
 		}
 	}
@@ -522,8 +522,8 @@ public:
 		}
 		if (p_values != nullptr && ret.has("values")) {
 			Array values = ret["values"];
-			for (int i = 0; i < values.size(); i++) {
-				p_values->push_back(values[i]);
+			for (const Variant &value : values) {
+				p_values->push_back(value);
 			}
 		}
 	}
@@ -549,8 +549,8 @@ public:
 		}
 		if (p_values != nullptr && ret.has("values")) {
 			Array values = ret["values"];
-			for (int i = 0; i < values.size(); i++) {
-				p_values->push_back(values[i]);
+			for (const Variant &value : values) {
+				p_values->push_back(value);
 			}
 		}
 	}
@@ -562,9 +562,9 @@ public:
 		TypedArray<Dictionary> ret;
 		GDVIRTUAL_REQUIRED_CALL(_debug_get_current_stack_info, ret);
 		Vector<StackInfo> sret;
-		for (int i = 0; i < ret.size(); i++) {
+		for (const Variant &var : ret) {
 			StackInfo si;
-			Dictionary d = ret[i];
+			Dictionary d = var;
 			ERR_CONTINUE(!d.has("file"));
 			ERR_CONTINUE(!d.has("func"));
 			ERR_CONTINUE(!d.has("line"));
@@ -595,8 +595,8 @@ public:
 	virtual void get_public_functions(List<MethodInfo> *p_functions) const override {
 		TypedArray<Dictionary> ret;
 		GDVIRTUAL_REQUIRED_CALL(_get_public_functions, ret);
-		for (int i = 0; i < ret.size(); i++) {
-			MethodInfo mi = MethodInfo::from_dict(ret[i]);
+		for (const Variant &var : ret) {
+			MethodInfo mi = MethodInfo::from_dict(var);
 			p_functions->push_back(mi);
 		}
 	}
@@ -615,8 +615,8 @@ public:
 	virtual void get_public_annotations(List<MethodInfo> *p_annotations) const override {
 		TypedArray<Dictionary> ret;
 		GDVIRTUAL_REQUIRED_CALL(_get_public_annotations, ret);
-		for (int i = 0; i < ret.size(); i++) {
-			MethodInfo mi = MethodInfo::from_dict(ret[i]);
+		for (const Variant &var : ret) {
+			MethodInfo mi = MethodInfo::from_dict(var);
 			p_annotations->push_back(mi);
 		}
 	}
diff --git a/core/os/os.h b/core/os/os.h
index 370e724c53..3827bb273a 100644
--- a/core/os/os.h
+++ b/core/os/os.h
@@ -153,7 +153,7 @@ public:
 
 	virtual void alert(const String &p_alert, const String &p_title = "ALERT!");
 
-	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) { return ERR_UNAVAILABLE; }
+	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr, bool p_generate_temp_files = false) { return ERR_UNAVAILABLE; }
 	virtual Error close_dynamic_library(void *p_library_handle) { return ERR_UNAVAILABLE; }
 	virtual Error get_dynamic_library_symbol_handle(void *p_library_handle, const String &p_name, void *&p_symbol_handle, bool p_optional = false) { return ERR_UNAVAILABLE; }
 
diff --git a/core/variant/array.cpp b/core/variant/array.cpp
index 5d6fbb8bed..3685515db5 100644
--- a/core/variant/array.cpp
+++ b/core/variant/array.cpp
@@ -81,6 +81,22 @@ void Array::_unref() const {
 	_p = nullptr;
 }
 
+Array::Iterator Array::begin() {
+	return Iterator(_p->array.ptrw(), _p->read_only);
+}
+
+Array::Iterator Array::end() {
+	return Iterator(_p->array.ptrw() + _p->array.size(), _p->read_only);
+}
+
+Array::ConstIterator Array::begin() const {
+	return ConstIterator(_p->array.ptr(), _p->read_only);
+}
+
+Array::ConstIterator Array::end() const {
+	return ConstIterator(_p->array.ptr() + _p->array.size(), _p->read_only);
+}
+
 Variant &Array::operator[](int p_idx) {
 	if (unlikely(_p->read_only)) {
 		*_p->read_only = _p->array[p_idx];
diff --git a/core/variant/array.h b/core/variant/array.h
index 8b1f8c0678..d45a6887e2 100644
--- a/core/variant/array.h
+++ b/core/variant/array.h
@@ -46,6 +46,70 @@ class Array {
 	void _unref() const;
 
 public:
+	struct ConstIterator {
+		_FORCE_INLINE_ const Variant &operator*() const;
+		_FORCE_INLINE_ const Variant *operator->() const;
+
+		_FORCE_INLINE_ ConstIterator &operator++();
+		_FORCE_INLINE_ ConstIterator &operator--();
+
+		_FORCE_INLINE_ bool operator==(const ConstIterator &p_other) const { return element_ptr == p_other.element_ptr; }
+		_FORCE_INLINE_ bool operator!=(const ConstIterator &p_other) const { return element_ptr == p_other.element_ptr; }
+
+		_FORCE_INLINE_ ConstIterator(const Variant *p_element_ptr, Variant *p_read_only = nullptr) :
+				element_ptr(p_element_ptr), read_only(p_read_only) {}
+		_FORCE_INLINE_ ConstIterator() {}
+		_FORCE_INLINE_ ConstIterator(const ConstIterator &p_other) :
+				element_ptr(p_other.element_ptr), read_only(p_other.read_only) {}
+
+		_FORCE_INLINE_ ConstIterator &operator=(const ConstIterator &p_other) {
+			element_ptr = p_other.element_ptr;
+			read_only = p_other.read_only;
+			return *this;
+		}
+
+	private:
+		const Variant *element_ptr = nullptr;
+		Variant *read_only = nullptr;
+	};
+
+	struct Iterator {
+		_FORCE_INLINE_ Variant &operator*() const;
+		_FORCE_INLINE_ Variant *operator->() const;
+
+		_FORCE_INLINE_ Iterator &operator++();
+		_FORCE_INLINE_ Iterator &operator--();
+
+		_FORCE_INLINE_ bool operator==(const Iterator &p_other) const { return element_ptr == p_other.element_ptr; }
+		_FORCE_INLINE_ bool operator!=(const Iterator &p_other) const { return element_ptr != p_other.element_ptr; }
+
+		_FORCE_INLINE_ Iterator(Variant *p_element_ptr, Variant *p_read_only = nullptr) :
+				element_ptr(p_element_ptr), read_only(p_read_only) {}
+		_FORCE_INLINE_ Iterator() {}
+		_FORCE_INLINE_ Iterator(const Iterator &p_other) :
+				element_ptr(p_other.element_ptr), read_only(p_other.read_only) {}
+
+		_FORCE_INLINE_ Iterator &operator=(const Iterator &p_other) {
+			element_ptr = p_other.element_ptr;
+			read_only = p_other.read_only;
+			return *this;
+		}
+
+		operator ConstIterator() const {
+			return ConstIterator(element_ptr, read_only);
+		}
+
+	private:
+		Variant *element_ptr = nullptr;
+		Variant *read_only = nullptr;
+	};
+
+	Iterator begin();
+	Iterator end();
+
+	ConstIterator begin() const;
+	ConstIterator end() const;
+
 	void _ref(const Array &p_from) const;
 
 	Variant &operator[](int p_idx);
diff --git a/core/variant/variant.h b/core/variant/variant.h
index fc4030ac5a..10f8dc3c7f 100644
--- a/core/variant/variant.h
+++ b/core/variant/variant.h
@@ -865,4 +865,56 @@ Callable Callable::bind(VarArgs... p_args) const {
 	return bindp(sizeof...(p_args) == 0 ? nullptr : (const Variant **)argptrs, sizeof...(p_args));
 }
 
+Variant &Array::Iterator::operator*() const {
+	if (unlikely(read_only)) {
+		*read_only = *element_ptr;
+		return *read_only;
+	}
+	return *element_ptr;
+}
+
+Variant *Array::Iterator::operator->() const {
+	if (unlikely(read_only)) {
+		*read_only = *element_ptr;
+		return read_only;
+	}
+	return element_ptr;
+}
+
+Array::Iterator &Array::Iterator::operator++() {
+	element_ptr++;
+	return *this;
+}
+
+Array::Iterator &Array::Iterator::operator--() {
+	element_ptr--;
+	return *this;
+}
+
+const Variant &Array::ConstIterator::operator*() const {
+	if (unlikely(read_only)) {
+		*read_only = *element_ptr;
+		return *read_only;
+	}
+	return *element_ptr;
+}
+
+const Variant *Array::ConstIterator::operator->() const {
+	if (unlikely(read_only)) {
+		*read_only = *element_ptr;
+		return read_only;
+	}
+	return element_ptr;
+}
+
+Array::ConstIterator &Array::ConstIterator::operator++() {
+	element_ptr++;
+	return *this;
+}
+
+Array::ConstIterator &Array::ConstIterator::operator--() {
+	element_ptr--;
+	return *this;
+}
+
 #endif // VARIANT_H
diff --git a/core/variant/variant_call.cpp b/core/variant/variant_call.cpp
index ba7c44e405..d0d940c47d 100644
--- a/core/variant/variant_call.cpp
+++ b/core/variant/variant_call.cpp
@@ -1799,7 +1799,7 @@ static void _register_variant_builtin_methods() {
 	bind_method(Vector2, dot, sarray("with"), varray());
 	bind_method(Vector2, slide, sarray("n"), varray());
 	bind_method(Vector2, bounce, sarray("n"), varray());
-	bind_method(Vector2, reflect, sarray("n"), varray());
+	bind_method(Vector2, reflect, sarray("line"), varray());
 	bind_method(Vector2, cross, sarray("with"), varray());
 	bind_method(Vector2, abs, sarray(), varray());
 	bind_method(Vector2, sign, sarray(), varray());
@@ -1896,7 +1896,7 @@ static void _register_variant_builtin_methods() {
 	bind_method(Vector3, project, sarray("b"), varray());
 	bind_method(Vector3, slide, sarray("n"), varray());
 	bind_method(Vector3, bounce, sarray("n"), varray());
-	bind_method(Vector3, reflect, sarray("n"), varray());
+	bind_method(Vector3, reflect, sarray("direction"), varray());
 	bind_method(Vector3, sign, sarray(), varray());
 	bind_method(Vector3, octahedron_encode, sarray(), varray());
 	bind_static_method(Vector3, octahedron_decode, sarray("uv"), varray());
diff --git a/core/variant/variant_parser.cpp b/core/variant/variant_parser.cpp
index 46c450d9f8..dcb94b16b1 100644
--- a/core/variant/variant_parser.cpp
+++ b/core/variant/variant_parser.cpp
@@ -30,6 +30,7 @@
 
 #include "variant_parser.h"
 
+#include "core/crypto/crypto_core.h"
 #include "core/input/input_event.h"
 #include "core/io/resource_loader.h"
 #include "core/object/script_language.h"
@@ -595,6 +596,82 @@ Error VariantParser::_parse_construct(Stream *p_stream, Vector<T> &r_construct,
 	return OK;
 }
 
+Error VariantParser::_parse_byte_array(Stream *p_stream, Vector<uint8_t> &r_construct, int &line, String &r_err_str) {
+	Token token;
+	get_token(p_stream, token, line, r_err_str);
+	if (token.type != TK_PARENTHESIS_OPEN) {
+		r_err_str = "Expected '(' in constructor";
+		return ERR_PARSE_ERROR;
+	}
+
+	get_token(p_stream, token, line, r_err_str);
+	if (token.type == TK_STRING) {
+		// Base64 encoded array.
+		String base64_encoded_string = token.value;
+		int strlen = base64_encoded_string.length();
+		CharString cstr = base64_encoded_string.ascii();
+
+		size_t arr_len = 0;
+		r_construct.resize(strlen / 4 * 3 + 1);
+		uint8_t *w = r_construct.ptrw();
+		Error err = CryptoCore::b64_decode(&w[0], r_construct.size(), &arr_len, (unsigned char *)cstr.get_data(), strlen);
+		if (err) {
+			r_err_str = "Invalid base64-encoded string";
+			return ERR_PARSE_ERROR;
+		}
+		r_construct.resize(arr_len);
+
+		get_token(p_stream, token, line, r_err_str);
+		if (token.type != TK_PARENTHESIS_CLOSE) {
+			r_err_str = "Expected ')' in constructor";
+			return ERR_PARSE_ERROR;
+		}
+
+	} else if (token.type == TK_NUMBER || token.type == TK_IDENTIFIER) {
+		// Individual elements.
+		while (true) {
+			if (token.type != TK_NUMBER) {
+				bool valid = false;
+				if (token.type == TK_IDENTIFIER) {
+					double real = stor_fix(token.value);
+					if (real != -1) {
+						token.type = TK_NUMBER;
+						token.value = real;
+						valid = true;
+					}
+				}
+				if (!valid) {
+					r_err_str = "Expected number in constructor";
+					return ERR_PARSE_ERROR;
+				}
+			}
+
+			r_construct.push_back(token.value);
+
+			get_token(p_stream, token, line, r_err_str);
+
+			if (token.type == TK_COMMA) {
+				//do none
+			} else if (token.type == TK_PARENTHESIS_CLOSE) {
+				break;
+			} else {
+				r_err_str = "Expected ',' or ')' in constructor";
+				return ERR_PARSE_ERROR;
+			}
+
+			get_token(p_stream, token, line, r_err_str);
+		}
+	} else if (token.type == TK_PARENTHESIS_CLOSE) {
+		// Empty array.
+		return OK;
+	} else {
+		r_err_str = "Expected base64 string, or list of numbers in constructor";
+		return ERR_PARSE_ERROR;
+	}
+
+	return OK;
+}
+
 Error VariantParser::parse_value(Token &token, Variant &value, Stream *p_stream, int &line, String &r_err_str, ResourceParser *p_res_parser) {
 	if (token.type == TK_CURLY_BRACKET_OPEN) {
 		Dictionary d;
@@ -1027,7 +1104,7 @@ Error VariantParser::parse_value(Token &token, Variant &value, Stream *p_stream,
 				Error err = p_res_parser->ext_func(p_res_parser->userdata, p_stream, res, line, r_err_str);
 				if (err) {
 					// If the file is missing, the error can be ignored.
-					if (err != ERR_FILE_NOT_FOUND && err != ERR_CANT_OPEN) {
+					if (err != ERR_FILE_NOT_FOUND && err != ERR_CANT_OPEN && err != ERR_FILE_CANT_OPEN) {
 						return err;
 					}
 				}
@@ -1148,7 +1225,7 @@ Error VariantParser::parse_value(Token &token, Variant &value, Stream *p_stream,
 			value = array;
 		} else if (id == "PackedByteArray" || id == "PoolByteArray" || id == "ByteArray") {
 			Vector<uint8_t> args;
-			Error err = _parse_construct<uint8_t>(p_stream, args, line, r_err_str);
+			Error err = _parse_byte_array(p_stream, args, line, r_err_str);
 			if (err) {
 				return err;
 			}
@@ -2012,12 +2089,14 @@ Error VariantWriter::write(const Variant &p_variant, StoreStringFunc p_store_str
 				p_recursion_count++;
 
 				p_store_string_func(p_store_string_ud, "[");
-				int len = array.size();
-				for (int i = 0; i < len; i++) {
-					if (i > 0) {
+				bool first = true;
+				for (const Variant &var : array) {
+					if (first) {
+						first = false;
+					} else {
 						p_store_string_func(p_store_string_ud, ", ");
 					}
-					write(array[i], p_store_string_func, p_store_string_ud, p_encode_res_func, p_encode_res_ud, p_recursion_count);
+					write(var, p_store_string_func, p_store_string_ud, p_encode_res_func, p_encode_res_ud, p_recursion_count);
 				}
 
 				p_store_string_func(p_store_string_ud, "]");
@@ -2031,17 +2110,11 @@ Error VariantWriter::write(const Variant &p_variant, StoreStringFunc p_store_str
 		case Variant::PACKED_BYTE_ARRAY: {
 			p_store_string_func(p_store_string_ud, "PackedByteArray(");
 			Vector<uint8_t> data = p_variant;
-			int len = data.size();
-			const uint8_t *ptr = data.ptr();
-
-			for (int i = 0; i < len; i++) {
-				if (i > 0) {
-					p_store_string_func(p_store_string_ud, ", ");
-				}
-
-				p_store_string_func(p_store_string_ud, itos(ptr[i]));
+			if (data.size() > 0) {
+				p_store_string_func(p_store_string_ud, "\"");
+				p_store_string_func(p_store_string_ud, CryptoCore::b64_encode_str(data.ptr(), data.size()));
+				p_store_string_func(p_store_string_ud, "\"");
 			}
-
 			p_store_string_func(p_store_string_ud, ")");
 		} break;
 		case Variant::PACKED_INT32_ARRAY: {
diff --git a/core/variant/variant_parser.h b/core/variant/variant_parser.h
index 18448100e0..2f8974849f 100644
--- a/core/variant/variant_parser.h
+++ b/core/variant/variant_parser.h
@@ -141,6 +141,7 @@ private:
 
 	template <typename T>
 	static Error _parse_construct(Stream *p_stream, Vector<T> &r_construct, int &line, String &r_err_str);
+	static Error _parse_byte_array(Stream *p_stream, Vector<uint8_t> &r_construct, int &line, String &r_err_str);
 	static Error _parse_enginecfg(Stream *p_stream, Vector<String> &strings, int &line, String &r_err_str);
 	static Error _parse_dictionary(Dictionary &object, Stream *p_stream, int &line, String &r_err_str, ResourceParser *p_res_parser = nullptr);
 	static Error _parse_array(Array &array, Stream *p_stream, int &line, String &r_err_str, ResourceParser *p_res_parser = nullptr);
diff --git a/doc/classes/@GlobalScope.xml b/doc/classes/@GlobalScope.xml
index 3a1bd83c18..6d8517927c 100644
--- a/doc/classes/@GlobalScope.xml
+++ b/doc/classes/@GlobalScope.xml
@@ -368,7 +368,7 @@
 			<param index="1" name="curve" type="float" />
 			<description>
 				Returns an "eased" value of [param x] based on an easing function defined with [param curve]. This easing function is based on an exponent. The [param curve] can be any floating-point number, with specific values leading to the following behaviors:
-				[codeblock]
+				[codeblock lang=text]
 				- Lower than -1.0 (exclusive): Ease in-out
 				- 1.0: Linear
 				- Between -1.0 and 0.0 (exclusive): Ease out-in
@@ -460,8 +460,8 @@
 				    var x = i * 0.5 - 1.5
 				    print("%4.1f           %4.1f  | %4.1f" % [x, fmod(x, 1.5), fposmod(x, 1.5)])
 				[/codeblock]
-				Produces:
-				[codeblock]
+				Prints:
+				[codeblock lang=text]
 				 (x)  (fmod(x, 1.5))   (fposmod(x, 1.5))
 				-1.5           -0.0  |  0.0
 				-1.0           -1.0  |  0.5
@@ -818,8 +818,8 @@
 				for i in range(-3, 4):
 				    print("%2d       %2d  | %2d" % [i, i % 3, posmod(i, 3)])
 				[/codeblock]
-				Produces:
-				[codeblock]
+				Prints:
+				[codeblock lang=text]
 				(i)  (i % 3)   (posmod(i, 3))
 				-3        0  |  0
 				-2       -2  |  1
@@ -1457,7 +1457,7 @@
 				[/csharp]
 				[/codeblocks]
 				Prints:
-				[codeblock]
+				[codeblock lang=text]
 				{
 				    "a": 1,
 				    "b": 2
diff --git a/doc/classes/AnimatableBody3D.xml b/doc/classes/AnimatableBody3D.xml
index f5c6217477..f888ca1416 100644
--- a/doc/classes/AnimatableBody3D.xml
+++ b/doc/classes/AnimatableBody3D.xml
@@ -8,9 +8,9 @@
 		When [AnimatableBody3D] is moved, its linear and angular velocity are estimated and used to affect other physics bodies in its path. This makes it useful for moving platforms, doors, and other moving objects.
 	</description>
 	<tutorials>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<members>
 		<member name="sync_to_physics" type="bool" setter="set_sync_to_physics" getter="is_sync_to_physics_enabled" default="true">
diff --git a/doc/classes/AnimatedSprite2D.xml b/doc/classes/AnimatedSprite2D.xml
index 4b38773505..012ae4fe29 100644
--- a/doc/classes/AnimatedSprite2D.xml
+++ b/doc/classes/AnimatedSprite2D.xml
@@ -8,7 +8,7 @@
 	</description>
 	<tutorials>
 		<link title="2D Sprite animation">$DOCS_URL/tutorials/2d/2d_sprite_animation.html</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
 	</tutorials>
 	<methods>
 		<method name="get_playing_speed" qualifiers="const">
diff --git a/doc/classes/AnimationNodeAdd3.xml b/doc/classes/AnimationNodeAdd3.xml
index 12f00849cc..edd733ad34 100644
--- a/doc/classes/AnimationNodeAdd3.xml
+++ b/doc/classes/AnimationNodeAdd3.xml
@@ -13,6 +13,6 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 </class>
diff --git a/doc/classes/AnimationNodeAnimation.xml b/doc/classes/AnimationNodeAnimation.xml
index 5683371182..0c85e8e670 100644
--- a/doc/classes/AnimationNodeAnimation.xml
+++ b/doc/classes/AnimationNodeAnimation.xml
@@ -8,8 +8,8 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<members>
 		<member name="animation" type="StringName" setter="set_animation" getter="get_animation" default="&amp;&quot;&quot;">
diff --git a/doc/classes/AnimationNodeBlend2.xml b/doc/classes/AnimationNodeBlend2.xml
index 61df0b26b3..50d785fee7 100644
--- a/doc/classes/AnimationNodeBlend2.xml
+++ b/doc/classes/AnimationNodeBlend2.xml
@@ -9,7 +9,7 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 </class>
diff --git a/doc/classes/AnimationNodeBlendSpace2D.xml b/doc/classes/AnimationNodeBlendSpace2D.xml
index 47933a1fd0..596767599c 100644
--- a/doc/classes/AnimationNodeBlendSpace2D.xml
+++ b/doc/classes/AnimationNodeBlendSpace2D.xml
@@ -10,7 +10,7 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="add_blend_point">
diff --git a/doc/classes/AnimationNodeOneShot.xml b/doc/classes/AnimationNodeOneShot.xml
index 6ff2d6f6db..b2a8002d74 100644
--- a/doc/classes/AnimationNodeOneShot.xml
+++ b/doc/classes/AnimationNodeOneShot.xml
@@ -53,7 +53,7 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<members>
 		<member name="autorestart" type="bool" setter="set_autorestart" getter="has_autorestart" default="false">
diff --git a/doc/classes/AnimationNodeOutput.xml b/doc/classes/AnimationNodeOutput.xml
index f957650294..6186fdd8e1 100644
--- a/doc/classes/AnimationNodeOutput.xml
+++ b/doc/classes/AnimationNodeOutput.xml
@@ -8,7 +8,7 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 </class>
diff --git a/doc/classes/AnimationNodeTimeScale.xml b/doc/classes/AnimationNodeTimeScale.xml
index 4cb8ccb962..9bf7529124 100644
--- a/doc/classes/AnimationNodeTimeScale.xml
+++ b/doc/classes/AnimationNodeTimeScale.xml
@@ -8,6 +8,6 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
 	</tutorials>
 </class>
diff --git a/doc/classes/AnimationNodeTransition.xml b/doc/classes/AnimationNodeTransition.xml
index 775a208735..382166d823 100644
--- a/doc/classes/AnimationNodeTransition.xml
+++ b/doc/classes/AnimationNodeTransition.xml
@@ -38,8 +38,8 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="is_input_loop_broken_at_end" qualifiers="const">
diff --git a/doc/classes/AnimationPlayer.xml b/doc/classes/AnimationPlayer.xml
index 9d2d93b0f0..1b742bea28 100644
--- a/doc/classes/AnimationPlayer.xml
+++ b/doc/classes/AnimationPlayer.xml
@@ -12,7 +12,7 @@
 	<tutorials>
 		<link title="2D Sprite animation">$DOCS_URL/tutorials/2d/2d_sprite_animation.html</link>
 		<link title="Animation documentation index">$DOCS_URL/tutorials/animation/index.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="animation_get_next" qualifiers="const">
diff --git a/doc/classes/AnimationTree.xml b/doc/classes/AnimationTree.xml
index d240a4967e..4a63b35ba0 100644
--- a/doc/classes/AnimationTree.xml
+++ b/doc/classes/AnimationTree.xml
@@ -9,7 +9,7 @@
 	</description>
 	<tutorials>
 		<link title="Using AnimationTree">$DOCS_URL/tutorials/animation/animation_tree.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="get_process_callback" qualifiers="const" deprecated="Use [member AnimationMixer.callback_mode_process] instead.">
diff --git a/doc/classes/Area2D.xml b/doc/classes/Area2D.xml
index a52aa80606..4ad5db2b67 100644
--- a/doc/classes/Area2D.xml
+++ b/doc/classes/Area2D.xml
@@ -10,9 +10,9 @@
 	</description>
 	<tutorials>
 		<link title="Using Area2D">$DOCS_URL/tutorials/physics/using_area_2d.html</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
-		<link title="2D Pong Demo">https://godotengine.org/asset-library/asset/121</link>
-		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/120</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
+		<link title="2D Pong Demo">https://godotengine.org/asset-library/asset/2728</link>
+		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/2727</link>
 	</tutorials>
 	<methods>
 		<method name="get_overlapping_areas" qualifiers="const">
diff --git a/doc/classes/Area3D.xml b/doc/classes/Area3D.xml
index 671c824c30..8eedd3cdf2 100644
--- a/doc/classes/Area3D.xml
+++ b/doc/classes/Area3D.xml
@@ -11,8 +11,8 @@
 	</description>
 	<tutorials>
 		<link title="Using Area2D">$DOCS_URL/tutorials/physics/using_area_2d.html</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="GUI in 3D Demo">https://godotengine.org/asset-library/asset/127</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="GUI in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2807</link>
 	</tutorials>
 	<methods>
 		<method name="get_overlapping_areas" qualifiers="const">
@@ -124,12 +124,15 @@
 		</member>
 		<member name="wind_attenuation_factor" type="float" setter="set_wind_attenuation_factor" getter="get_wind_attenuation_factor" default="0.0">
 			The exponential rate at which wind force decreases with distance from its origin.
+			[b]Note:[/b] This wind force only applies to [SoftBody3D] nodes. Other physics bodies are currently not affected by wind.
 		</member>
 		<member name="wind_force_magnitude" type="float" setter="set_wind_force_magnitude" getter="get_wind_force_magnitude" default="0.0">
 			The magnitude of area-specific wind force.
+			[b]Note:[/b] This wind force only applies to [SoftBody3D] nodes. Other physics bodies are currently not affected by wind.
 		</member>
 		<member name="wind_source_path" type="NodePath" setter="set_wind_source_path" getter="get_wind_source_path" default="NodePath(&quot;&quot;)">
 			The [Node3D] which is used to specify the direction and origin of an area-specific wind force. The direction is opposite to the z-axis of the [Node3D]'s local transform, and its origin is the origin of the [Node3D]'s local transform.
+			[b]Note:[/b] This wind force only applies to [SoftBody3D] nodes. Other physics bodies are currently not affected by wind.
 		</member>
 	</members>
 	<signals>
diff --git a/doc/classes/Array.xml b/doc/classes/Array.xml
index fd5ba57615..a72ac60536 100644
--- a/doc/classes/Array.xml
+++ b/doc/classes/Array.xml
@@ -40,6 +40,7 @@
 		[/codeblocks]
 		[b]Note:[/b] Arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate].
 		[b]Note:[/b] Erasing elements while iterating over arrays is [b]not[/b] supported and will result in unpredictable behavior.
+		[b]Differences between packed arrays, typed arrays, and untyped arrays:[/b] Packed arrays are generally faster to iterate on and modify compared to a typed array of the same type (e.g. [PackedInt64Array] versus [code]Array[int][/code]). Also, packed arrays consume less memory. As a downside, packed arrays are less flexible as they don't offer as many convenience methods such as [method Array.map]. Typed arrays are in turn faster to iterate on and modify than untyped arrays.
 	</description>
 	<tutorials>
 	</tutorials>
@@ -57,7 +58,30 @@
 			<param index="2" name="class_name" type="StringName" />
 			<param index="3" name="script" type="Variant" />
 			<description>
-				Creates a typed array from the [param base] array.
+				Creates a typed array from the [param base] array. All arguments are required.
+				- [param type] is the built-in type as a [enum Variant.Type] constant, for example [constant TYPE_INT].
+				- [param class_name] is the [b]native[/b] class name, for example [Node]. If [param type] is not [constant TYPE_OBJECT], must be an empty string.
+				- [param script] is the associated script. Must be a [Script] instance or [code]null[/code].
+				Examples:
+				[codeblock]
+				class_name MyNode
+				extends Node
+
+				class MyClass:
+				    pass
+
+				func _ready():
+				    var a = Array([], TYPE_INT, &amp;"", null) # Array[int]
+				    var b = Array([], TYPE_OBJECT, &amp;"Node", null) # Array[Node]
+				    var c = Array([], TYPE_OBJECT, &amp;"Node", MyNode) # Array[MyNode]
+				    var d = Array([], TYPE_OBJECT, &amp;"RefCounted", MyClass) # Array[MyClass]
+				[/codeblock]
+				[b]Note:[/b] This constructor can be useful if you want to create a typed array on the fly, but you are not required to use it. In GDScript you can use a temporary variable with the static type you need and then pass it:
+				[codeblock]
+				func _ready():
+				    var a: Array[int] = []
+				    some_func(a)
+				[/codeblock]
 			</description>
 		</constructor>
 		<constructor name="Array">
@@ -323,19 +347,19 @@
 		<method name="get_typed_builtin" qualifiers="const">
 			<return type="int" />
 			<description>
-				Returns the [enum Variant.Type] constant for a typed array. If the [Array] is not typed, returns [constant TYPE_NIL].
+				Returns the built-in type of the typed array as a [enum Variant.Type] constant. If the array is not typed, returns [constant TYPE_NIL].
 			</description>
 		</method>
 		<method name="get_typed_class_name" qualifiers="const">
 			<return type="StringName" />
 			<description>
-				Returns a class name of a typed [Array] of type [constant TYPE_OBJECT].
+				Returns the [b]native[/b] class name of the typed array if the built-in type is [constant TYPE_OBJECT]. Otherwise, this method returns an empty string.
 			</description>
 		</method>
 		<method name="get_typed_script" qualifiers="const">
 			<return type="Variant" />
 			<description>
-				Returns the script associated with a typed array tied to a class name.
+				Returns the script associated with the typed array. This method returns a [Script] instance or [code]null[/code].
 			</description>
 		</method>
 		<method name="has" qualifiers="const" keywords="includes, contains">
diff --git a/doc/classes/AudioEffect.xml b/doc/classes/AudioEffect.xml
index 4fe47ac011..bd31603517 100644
--- a/doc/classes/AudioEffect.xml
+++ b/doc/classes/AudioEffect.xml
@@ -9,7 +9,7 @@
 	</description>
 	<tutorials>
 		<link title="Audio buses">$DOCS_URL/tutorials/audio/audio_buses.html</link>
-		<link title="Audio Mic Record Demo">https://godotengine.org/asset-library/asset/527</link>
+		<link title="Audio Microphone Record Demo">https://godotengine.org/asset-library/asset/2760</link>
 	</tutorials>
 	<methods>
 		<method name="_instantiate" qualifiers="virtual">
diff --git a/doc/classes/AudioEffectRecord.xml b/doc/classes/AudioEffectRecord.xml
index 5b43c5ebaa..ed0a0c5b08 100644
--- a/doc/classes/AudioEffectRecord.xml
+++ b/doc/classes/AudioEffectRecord.xml
@@ -11,7 +11,7 @@
 	</description>
 	<tutorials>
 		<link title="Recording with microphone">$DOCS_URL/tutorials/audio/recording_with_microphone.html</link>
-		<link title="Audio Mic Record Demo">https://godotengine.org/asset-library/asset/527</link>
+		<link title="Audio Microphone Record Demo">https://godotengine.org/asset-library/asset/2760</link>
 	</tutorials>
 	<methods>
 		<method name="get_recording" qualifiers="const">
diff --git a/doc/classes/AudioEffectReverb.xml b/doc/classes/AudioEffectReverb.xml
index 2c055f3825..4ac9672a8f 100644
--- a/doc/classes/AudioEffectReverb.xml
+++ b/doc/classes/AudioEffectReverb.xml
@@ -8,7 +8,7 @@
 	</description>
 	<tutorials>
 		<link title="Audio buses">$DOCS_URL/tutorials/audio/audio_buses.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<members>
 		<member name="damping" type="float" setter="set_damping" getter="get_damping" default="0.5">
diff --git a/doc/classes/AudioEffectSpectrumAnalyzer.xml b/doc/classes/AudioEffectSpectrumAnalyzer.xml
index eb70396433..d9312cc87d 100644
--- a/doc/classes/AudioEffectSpectrumAnalyzer.xml
+++ b/doc/classes/AudioEffectSpectrumAnalyzer.xml
@@ -8,7 +8,7 @@
 		See also [AudioStreamGenerator] for procedurally generating sounds.
 	</description>
 	<tutorials>
-		<link title="Audio Spectrum Demo">https://godotengine.org/asset-library/asset/528</link>
+		<link title="Audio Spectrum Visualizer Demo">https://godotengine.org/asset-library/asset/2762</link>
 		<link title="Godot 3.2 will get new audio features">https://godotengine.org/article/godot-32-will-get-new-audio-features</link>
 	</tutorials>
 	<members>
diff --git a/doc/classes/AudioServer.xml b/doc/classes/AudioServer.xml
index 993aa581dc..b3cf53367d 100644
--- a/doc/classes/AudioServer.xml
+++ b/doc/classes/AudioServer.xml
@@ -8,9 +8,9 @@
 	</description>
 	<tutorials>
 		<link title="Audio buses">$DOCS_URL/tutorials/audio/audio_buses.html</link>
-		<link title="Audio Device Changer Demo">https://godotengine.org/asset-library/asset/525</link>
-		<link title="Audio Mic Record Demo">https://godotengine.org/asset-library/asset/527</link>
-		<link title="Audio Spectrum Demo">https://godotengine.org/asset-library/asset/528</link>
+		<link title="Audio Device Changer Demo">https://godotengine.org/asset-library/asset/2758</link>
+		<link title="Audio Microphone Record Demo">https://godotengine.org/asset-library/asset/2760</link>
+		<link title="Audio Spectrum Visualizer Demo">https://godotengine.org/asset-library/asset/2762</link>
 	</tutorials>
 	<methods>
 		<method name="add_bus">
diff --git a/doc/classes/AudioStream.xml b/doc/classes/AudioStream.xml
index 9813c2f251..4abce3f1da 100644
--- a/doc/classes/AudioStream.xml
+++ b/doc/classes/AudioStream.xml
@@ -8,9 +8,9 @@
 	</description>
 	<tutorials>
 		<link title="Audio streams">$DOCS_URL/tutorials/audio/audio_streams.html</link>
-		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/526</link>
-		<link title="Audio Mic Record Demo">https://godotengine.org/asset-library/asset/527</link>
-		<link title="Audio Spectrum Demo">https://godotengine.org/asset-library/asset/528</link>
+		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/2759</link>
+		<link title="Audio Microphone Record Demo">https://godotengine.org/asset-library/asset/2760</link>
+		<link title="Audio Spectrum Visualizer Demo">https://godotengine.org/asset-library/asset/2762</link>
 	</tutorials>
 	<methods>
 		<method name="_get_beat_count" qualifiers="virtual const">
diff --git a/doc/classes/AudioStreamGenerator.xml b/doc/classes/AudioStreamGenerator.xml
index 9e91d5c450..f618e69631 100644
--- a/doc/classes/AudioStreamGenerator.xml
+++ b/doc/classes/AudioStreamGenerator.xml
@@ -63,7 +63,7 @@
 		[b]Note:[/b] Due to performance constraints, this class is best used from C# or from a compiled language via GDExtension. If you still want to use this class from GDScript, consider using a lower [member mix_rate] such as 11,025 Hz or 22,050 Hz.
 	</description>
 	<tutorials>
-		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/526</link>
+		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/2759</link>
 	</tutorials>
 	<members>
 		<member name="buffer_length" type="float" setter="set_buffer_length" getter="get_buffer_length" default="0.5">
diff --git a/doc/classes/AudioStreamGeneratorPlayback.xml b/doc/classes/AudioStreamGeneratorPlayback.xml
index 88c5cf6dbe..2f86eaf9e9 100644
--- a/doc/classes/AudioStreamGeneratorPlayback.xml
+++ b/doc/classes/AudioStreamGeneratorPlayback.xml
@@ -7,7 +7,7 @@
 		This class is meant to be used with [AudioStreamGenerator] to play back the generated audio in real-time.
 	</description>
 	<tutorials>
-		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/526</link>
+		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/2759</link>
 		<link title="Godot 3.2 will get new audio features">https://godotengine.org/article/godot-32-will-get-new-audio-features</link>
 	</tutorials>
 	<methods>
diff --git a/doc/classes/AudioStreamPlayback.xml b/doc/classes/AudioStreamPlayback.xml
index 460a7050c8..9f87b76a2b 100644
--- a/doc/classes/AudioStreamPlayback.xml
+++ b/doc/classes/AudioStreamPlayback.xml
@@ -7,7 +7,7 @@
 		Can play, loop, pause a scroll through audio. See [AudioStream] and [AudioStreamOggVorbis] for usage.
 	</description>
 	<tutorials>
-		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/526</link>
+		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/2759</link>
 	</tutorials>
 	<methods>
 		<method name="_get_loop_count" qualifiers="virtual const">
diff --git a/doc/classes/AudioStreamPlayer.xml b/doc/classes/AudioStreamPlayer.xml
index fbe2508da3..a7d0a10073 100644
--- a/doc/classes/AudioStreamPlayer.xml
+++ b/doc/classes/AudioStreamPlayer.xml
@@ -1,100 +1,104 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="AudioStreamPlayer" inherits="Node" keywords="sound, music, song" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../class.xsd">
 	<brief_description>
-		Plays back audio non-positionally.
+		A node for audio playback.
 	</brief_description>
 	<description>
-		Plays an audio stream non-positionally.
-		To play audio positionally, use [AudioStreamPlayer2D] or [AudioStreamPlayer3D] instead of [AudioStreamPlayer].
+		The [AudioStreamPlayer] node plays an audio stream non-positionally. It is ideal for user interfaces, menus, or background music.
+		To use this node, [member stream] needs to be set to a valid [AudioStream] resource. Playing more than one sound at the time is also supported, see [member max_polyphony].
+		If you need to play audio at a specific position, use [AudioStreamPlayer2D] or [AudioStreamPlayer3D] instead.
 	</description>
 	<tutorials>
 		<link title="Audio streams">$DOCS_URL/tutorials/audio/audio_streams.html</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
-		<link title="Audio Device Changer Demo">https://godotengine.org/asset-library/asset/525</link>
-		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/526</link>
-		<link title="Audio Mic Record Demo">https://godotengine.org/asset-library/asset/527</link>
-		<link title="Audio Spectrum Demo">https://godotengine.org/asset-library/asset/528</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
+		<link title="Audio Device Changer Demo">https://godotengine.org/asset-library/asset/2758</link>
+		<link title="Audio Generator Demo">https://godotengine.org/asset-library/asset/2759</link>
+		<link title="Audio Microphone Record Demo">https://godotengine.org/asset-library/asset/2760</link>
+		<link title="Audio Spectrum Visualizer Demo">https://godotengine.org/asset-library/asset/2762</link>
 	</tutorials>
 	<methods>
 		<method name="get_playback_position">
 			<return type="float" />
 			<description>
-				Returns the position in the [AudioStream] in seconds.
+				Returns the position in the [AudioStream] of the latest sound, in seconds. Returns [code]0.0[/code] if no sounds are playing.
+				[b]Note:[/b] The position is not always accurate, as the [AudioServer] does not mix audio every processed frame. To get more accurate results, add [method AudioServer.get_time_since_last_mix] to the returned position.
 			</description>
 		</method>
 		<method name="get_stream_playback">
 			<return type="AudioStreamPlayback" />
 			<description>
-				Returns the [AudioStreamPlayback] object associated with this [AudioStreamPlayer].
+				Returns the latest [AudioStreamPlayback] of this node, usually the most recently created by [method play]. If no sounds are playing, this method fails and returns an empty playback.
 			</description>
 		</method>
 		<method name="has_stream_playback">
 			<return type="bool" />
 			<description>
-				Returns whether the [AudioStreamPlayer] can return the [AudioStreamPlayback] object or not.
+				Returns [code]true[/code] if any sound is active, even if [member stream_paused] is set to [code]true[/code]. See also [member playing] and [method get_stream_playback].
 			</description>
 		</method>
 		<method name="play">
 			<return type="void" />
 			<param index="0" name="from_position" type="float" default="0.0" />
 			<description>
-				Plays the audio from the given [param from_position], in seconds.
+				Plays a sound from the beginning, or the given [param from_position] in seconds.
 			</description>
 		</method>
 		<method name="seek">
 			<return type="void" />
 			<param index="0" name="to_position" type="float" />
 			<description>
-				Sets the position from which audio will be played, in seconds.
+				Restarts all sounds to be played from the given [param to_position], in seconds. Does nothing if no sounds are playing.
 			</description>
 		</method>
 		<method name="stop">
 			<return type="void" />
 			<description>
-				Stops the audio.
+				Stops all sounds from this node.
 			</description>
 		</method>
 	</methods>
 	<members>
 		<member name="autoplay" type="bool" setter="set_autoplay" getter="is_autoplay_enabled" default="false">
-			If [code]true[/code], audio plays when added to scene tree.
+			If [code]true[/code], this node calls [method play] when entering the tree.
 		</member>
 		<member name="bus" type="StringName" setter="set_bus" getter="get_bus" default="&amp;&quot;Master&quot;">
-			Bus on which this audio is playing.
-			[b]Note:[/b] When setting this property, keep in mind that no validation is performed to see if the given name matches an existing bus. This is because audio bus layouts might be loaded after this property is set. If this given name can't be resolved at runtime, it will fall back to [code]"Master"[/code].
+			The target bus name. All sounds from this node will be playing on this bus.
+			[b]Note:[/b] At runtime, if no bus with the given name exists, all sounds will fall back on [code]"Master"[/code]. See also [method AudioServer.get_bus_name].
 		</member>
 		<member name="max_polyphony" type="int" setter="set_max_polyphony" getter="get_max_polyphony" default="1">
-			The maximum number of sounds this node can play at the same time. Playing additional sounds after this value is reached will cut off the oldest sounds.
+			The maximum number of sounds this node can play at the same time. Calling [method play] after this value is reached will cut off the oldest sounds.
 		</member>
 		<member name="mix_target" type="int" setter="set_mix_target" getter="get_mix_target" enum="AudioStreamPlayer.MixTarget" default="0">
-			If the audio configuration has more than two speakers, this sets the target channels. See [enum MixTarget] constants.
+			The mix target channels, as one of the [enum MixTarget] constants. Has no effect when two speakers or less are detected (see [enum AudioServer.SpeakerMode]).
 		</member>
 		<member name="pitch_scale" type="float" setter="set_pitch_scale" getter="get_pitch_scale" default="1.0">
-			The pitch and the tempo of the audio, as a multiplier of the audio sample's sample rate.
+			The audio's pitch and tempo, as a multiplier of the [member stream]'s sample rate. A value of [code]2.0[/code] doubles the audio's pitch, while a value of [code]0.5[/code] halves the pitch.
 		</member>
 		<member name="playing" type="bool" setter="_set_playing" getter="is_playing" default="false">
-			If [code]true[/code], audio is playing.
+			If [code]true[/code], this node is playing sounds. Setting this property has the same effect as [method play] and [method stop].
 		</member>
 		<member name="stream" type="AudioStream" setter="set_stream" getter="get_stream">
-			The [AudioStream] object to be played.
+			The [AudioStream] resource to be played. Setting this property stops all currently playing sounds. If left empty, the [AudioStreamPlayer] does not work.
 		</member>
 		<member name="stream_paused" type="bool" setter="set_stream_paused" getter="get_stream_paused" default="false">
-			If [code]true[/code], the playback is paused. You can resume it by setting [member stream_paused] to [code]false[/code].
+			If [code]true[/code], the sounds are paused. Setting [member stream_paused] to [code]false[/code] resumes all sounds.
+			[b]Note:[/b] This property is automatically changed when exiting or entering the tree, or this node is paused (see [member Node.process_mode]).
 		</member>
 		<member name="volume_db" type="float" setter="set_volume_db" getter="get_volume_db" default="0.0">
-			Volume of sound, in dB.
+			Volume of sound, in decibel. This is an offset of the [member stream]'s volume.
+			[b]Note:[/b] To convert between decibel and linear energy (like most volume sliders do), use [method @GlobalScope.db_to_linear] and [method @GlobalScope.linear_to_db].
 		</member>
 	</members>
 	<signals>
 		<signal name="finished">
 			<description>
-				Emitted when the audio stops playing.
+				Emitted when a sound finishes playing without interruptions. This signal is [i]not[/i] emitted when calling [method stop], or when exiting the tree while sounds are playing.
 			</description>
 		</signal>
 	</signals>
 	<constants>
 		<constant name="MIX_TARGET_STEREO" value="0" enum="MixTarget">
-			The audio will be played only on the first channel.
+			The audio will be played only on the first channel. This is the default.
 		</constant>
 		<constant name="MIX_TARGET_SURROUND" value="1" enum="MixTarget">
 			The audio will be played on all surround channels.
diff --git a/doc/classes/Basis.xml b/doc/classes/Basis.xml
index 32077c0b89..338d9523fa 100644
--- a/doc/classes/Basis.xml
+++ b/doc/classes/Basis.xml
@@ -15,10 +15,10 @@
 		<link title="Math documentation index">$DOCS_URL/tutorials/math/index.html</link>
 		<link title="Matrices and transforms">$DOCS_URL/tutorials/math/matrices_and_transforms.html</link>
 		<link title="Using 3D transforms">$DOCS_URL/tutorials/3d/using_transforms.html</link>
-		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/584</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
-		<link title="2.5D Demo">https://godotengine.org/asset-library/asset/583</link>
+		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/2787</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
+		<link title="2.5D Game Demo">https://godotengine.org/asset-library/asset/2783</link>
 	</tutorials>
 	<constructors>
 		<constructor name="Basis">
diff --git a/doc/classes/BoxShape3D.xml b/doc/classes/BoxShape3D.xml
index 5190e6e759..ead5afd89c 100644
--- a/doc/classes/BoxShape3D.xml
+++ b/doc/classes/BoxShape3D.xml
@@ -8,9 +8,9 @@
 		[b]Performance:[/b] [BoxShape3D] is fast to check collisions against. It is faster than [CapsuleShape3D] and [CylinderShape3D], but slower than [SphereShape3D].
 	</description>
 	<tutorials>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
-		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/126</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
+		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2739</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
 	</tutorials>
 	<members>
 		<member name="size" type="Vector3" setter="set_size" getter="get_size" default="Vector3(1, 1, 1)">
diff --git a/doc/classes/Button.xml b/doc/classes/Button.xml
index e5b47ffb89..30df4fd10d 100644
--- a/doc/classes/Button.xml
+++ b/doc/classes/Button.xml
@@ -36,8 +36,8 @@
 		[b]Note:[/b] Buttons do not interpret touch input and therefore don't support multitouch, since mouse emulation can only press one button at a given time. Use [TouchScreenButton] for buttons that trigger gameplay movement or actions.
 	</description>
 	<tutorials>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
-		<link title="OS Test Demo">https://godotengine.org/asset-library/asset/677</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
+		<link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link>
 	</tutorials>
 	<members>
 		<member name="alignment" type="int" setter="set_text_alignment" getter="get_text_alignment" enum="HorizontalAlignment" default="1">
diff --git a/doc/classes/Camera2D.xml b/doc/classes/Camera2D.xml
index 6319bfb3d1..da98e6c26a 100644
--- a/doc/classes/Camera2D.xml
+++ b/doc/classes/Camera2D.xml
@@ -10,9 +10,8 @@
 		Note that the [Camera2D] node's [code]position[/code] doesn't represent the actual position of the screen, which may differ due to applied smoothing or limits. You can use [method get_screen_center_position] to get the real position.
 	</description>
 	<tutorials>
-		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/120</link>
-		<link title="2D Isometric Demo">https://godotengine.org/asset-library/asset/112</link>
-		<link title="2D HDR Demo">https://godotengine.org/asset-library/asset/110</link>
+		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/2727</link>
+		<link title="2D Isometric Demo">https://godotengine.org/asset-library/asset/2718</link>
 	</tutorials>
 	<methods>
 		<method name="align">
diff --git a/doc/classes/Camera3D.xml b/doc/classes/Camera3D.xml
index 01890b471c..27194122d5 100644
--- a/doc/classes/Camera3D.xml
+++ b/doc/classes/Camera3D.xml
@@ -7,7 +7,7 @@
 		[Camera3D] is a special node that displays what is visible from its current location. Cameras register themselves in the nearest [Viewport] node (when ascending the tree). Only one camera can be active per viewport. If no viewport is available ascending the tree, the camera will register in the global viewport. In other words, a camera just provides 3D display capabilities to a [Viewport], and, without one, a scene registered in that [Viewport] (or higher viewports) can't be displayed.
 	</description>
 	<tutorials>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="clear_current">
diff --git a/doc/classes/CameraServer.xml b/doc/classes/CameraServer.xml
index 983bd6cd91..020b5d887b 100644
--- a/doc/classes/CameraServer.xml
+++ b/doc/classes/CameraServer.xml
@@ -6,7 +6,7 @@
 	<description>
 		The [CameraServer] keeps track of different cameras accessible in Godot. These are external cameras such as webcams or the cameras on your phone.
 		It is notably used to provide AR modules with a video feed from the camera.
-		[b]Note:[/b] This class is currently only implemented on macOS and iOS. On other platforms, no [CameraFeed]s will be available.
+		[b]Note:[/b] This class is currently only implemented on macOS and iOS. To get a [CameraFeed] on iOS, the camera plugin from [url=https://github.com/godotengine/godot-ios-plugins]godot-ios-plugins[/url] is required. On other platforms, no [CameraFeed]s will be available.
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/CanvasItem.xml b/doc/classes/CanvasItem.xml
index 72783bc5d6..1cfd44467f 100644
--- a/doc/classes/CanvasItem.xml
+++ b/doc/classes/CanvasItem.xml
@@ -12,7 +12,7 @@
 	<tutorials>
 		<link title="Viewport and canvas transforms">$DOCS_URL/tutorials/2d/2d_transforms.html</link>
 		<link title="Custom drawing in 2D">$DOCS_URL/tutorials/2d/custom_drawing_in_2d.html</link>
-		<link title="Audio Spectrum Demo">https://godotengine.org/asset-library/asset/528</link>
+		<link title="Audio Spectrum Visualizer Demo">https://godotengine.org/asset-library/asset/2762</link>
 	</tutorials>
 	<methods>
 		<method name="_draw" qualifiers="virtual">
@@ -606,14 +606,15 @@
 			[b]Note:[/b] For controls that inherit [Popup], the correct way to make them visible is to call one of the multiple [code]popup*()[/code] functions instead.
 		</member>
 		<member name="y_sort_enabled" type="bool" setter="set_y_sort_enabled" getter="is_y_sort_enabled" default="false">
-			If [code]true[/code], this [CanvasItem] and its [CanvasItem] child nodes are sorted according to the Y position. Nodes with a lower Y position are drawn before those with a higher Y position. If [code]false[/code], Y-sorting is disabled.
-			You can nest nodes with Y-sorting. Child Y-sorted nodes are sorted in the same space as the parent Y-sort. This feature allows you to organize a scene better or divide it into multiple ones without changing your scene tree.
+			If [code]true[/code], this and child [CanvasItem] nodes with a lower Y position are rendered in front of nodes with a higher Y position. If [code]false[/code], this and child [CanvasItem] nodes are rendered normally in scene tree order.
+			With Y-sorting enabled on a parent node ('A') but disabled on a child node ('B'), the child node ('B') is sorted but its children ('C1', 'C2', etc) render together on the same Y position as the child node 'B'. This allows you to organize the render order of a scene without changing the scene tree.
+			Nodes sort relative to each other only if they are on the same [member z_index].
 		</member>
 		<member name="z_as_relative" type="bool" setter="set_z_as_relative" getter="is_z_relative" default="true">
 			If [code]true[/code], the node's Z index is relative to its parent's Z index. If this node's Z index is 2 and its parent's effective Z index is 3, then this node's effective Z index will be 2 + 3 = 5.
 		</member>
 		<member name="z_index" type="int" setter="set_z_index" getter="get_z_index" default="0">
-			Z index. Controls the order in which the nodes render. A node with a higher Z index will display in front of others. Must be between [constant RenderingServer.CANVAS_ITEM_Z_MIN] and [constant RenderingServer.CANVAS_ITEM_Z_MAX] (inclusive).
+			Controls the order in which the nodes render. A node with a higher Z index will display in front of others. Must be between [constant RenderingServer.CANVAS_ITEM_Z_MIN] and [constant RenderingServer.CANVAS_ITEM_Z_MAX] (inclusive).
 			[b]Note:[/b] Changing the Z index of a [Control] only affects the drawing order, not the order in which input events are handled. This can be useful to implement certain UI animations, e.g. a menu where hovered items are scaled and should overlap others.
 		</member>
 	</members>
diff --git a/doc/classes/CanvasLayer.xml b/doc/classes/CanvasLayer.xml
index a91b003d79..597ec78089 100644
--- a/doc/classes/CanvasLayer.xml
+++ b/doc/classes/CanvasLayer.xml
@@ -12,7 +12,7 @@
 	<tutorials>
 		<link title="Viewport and canvas transforms">$DOCS_URL/tutorials/2d/2d_transforms.html</link>
 		<link title="Canvas layers">$DOCS_URL/tutorials/2d/canvas_layers.html</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
 	</tutorials>
 	<methods>
 		<method name="get_canvas" qualifiers="const">
diff --git a/doc/classes/CapsuleShape3D.xml b/doc/classes/CapsuleShape3D.xml
index 2c8c2cef9e..4c6b3a870f 100644
--- a/doc/classes/CapsuleShape3D.xml
+++ b/doc/classes/CapsuleShape3D.xml
@@ -8,7 +8,7 @@
 		[b]Performance:[/b] [CapsuleShape3D] is fast to check collisions against. It is faster than [CylinderShape3D], but slower than [SphereShape3D] and [BoxShape3D].
 	</description>
 	<tutorials>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
 	</tutorials>
 	<members>
 		<member name="height" type="float" setter="set_height" getter="get_height" default="2.0">
diff --git a/doc/classes/CharacterBody2D.xml b/doc/classes/CharacterBody2D.xml
index b66a01a282..ede4d63cfc 100644
--- a/doc/classes/CharacterBody2D.xml
+++ b/doc/classes/CharacterBody2D.xml
@@ -10,8 +10,8 @@
 	<tutorials>
 		<link title="Kinematic character (2D)">$DOCS_URL/tutorials/physics/kinematic_character_2d.html</link>
 		<link title="Using CharacterBody2D">$DOCS_URL/tutorials/physics/using_character_body_2d.html</link>
-		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/113</link>
-		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/120</link>
+		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2719</link>
+		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/2727</link>
 	</tutorials>
 	<methods>
 		<method name="apply_floor_snap">
@@ -30,7 +30,8 @@
 		<method name="get_floor_normal" qualifiers="const">
 			<return type="Vector2" />
 			<description>
-				Returns the surface normal of the floor at the last collision point. Only valid after calling [method move_and_slide] and when [method is_on_floor] returns [code]true[/code].
+				Returns the collision normal of the floor at the last collision point. Only valid after calling [method move_and_slide] and when [method is_on_floor] returns [code]true[/code].
+				[b]Warning:[/b] The collision normal is not always the same as the surface normal.
 			</description>
 		</method>
 		<method name="get_last_motion" qualifiers="const">
@@ -94,7 +95,8 @@
 		<method name="get_wall_normal" qualifiers="const">
 			<return type="Vector2" />
 			<description>
-				Returns the surface normal of the wall at the last collision point. Only valid after calling [method move_and_slide] and when [method is_on_wall] returns [code]true[/code].
+				Returns the collision normal of the wall at the last collision point. Only valid after calling [method move_and_slide] and when [method is_on_wall] returns [code]true[/code].
+				[b]Warning:[/b] The collision normal is not always the same as the surface normal.
 			</description>
 		</method>
 		<method name="is_on_ceiling" qualifiers="const">
diff --git a/doc/classes/CharacterBody3D.xml b/doc/classes/CharacterBody3D.xml
index 2382c77a12..474adfc6ff 100644
--- a/doc/classes/CharacterBody3D.xml
+++ b/doc/classes/CharacterBody3D.xml
@@ -9,10 +9,10 @@
 	</description>
 	<tutorials>
 		<link title="Kinematic character (2D)">$DOCS_URL/tutorials/physics/kinematic_character_2d.html</link>
-		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/126</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2739</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="apply_floor_snap">
@@ -31,7 +31,8 @@
 		<method name="get_floor_normal" qualifiers="const">
 			<return type="Vector3" />
 			<description>
-				Returns the surface normal of the floor at the last collision point. Only valid after calling [method move_and_slide] and when [method is_on_floor] returns [code]true[/code].
+				Returns the collision normal of the floor at the last collision point. Only valid after calling [method move_and_slide] and when [method is_on_floor] returns [code]true[/code].
+				[b]Warning:[/b] The collision normal is not always the same as the surface normal.
 			</description>
 		</method>
 		<method name="get_last_motion" qualifiers="const">
@@ -86,7 +87,8 @@
 		<method name="get_wall_normal" qualifiers="const">
 			<return type="Vector3" />
 			<description>
-				Returns the surface normal of the wall at the last collision point. Only valid after calling [method move_and_slide] and when [method is_on_wall] returns [code]true[/code].
+				Returns the collision normal of the wall at the last collision point. Only valid after calling [method move_and_slide] and when [method is_on_wall] returns [code]true[/code].
+				[b]Warning:[/b] The collision normal is not always the same as the surface normal.
 			</description>
 		</method>
 		<method name="is_on_ceiling" qualifiers="const">
diff --git a/doc/classes/CollisionPolygon2D.xml b/doc/classes/CollisionPolygon2D.xml
index 29535ffd64..1805683de5 100644
--- a/doc/classes/CollisionPolygon2D.xml
+++ b/doc/classes/CollisionPolygon2D.xml
@@ -26,7 +26,6 @@
 		<member name="polygon" type="PackedVector2Array" setter="set_polygon" getter="get_polygon" default="PackedVector2Array()">
 			The polygon's list of vertices. Each point will be connected to the next, and the final point will be connected to the first.
 			[b]Note:[/b] The returned vertices are in the local coordinate space of the given [CollisionPolygon2D].
-			[b]Warning:[/b] The returned value is a clone of the [PackedVector2Array], not a reference.
 		</member>
 	</members>
 	<constants>
diff --git a/doc/classes/CollisionPolygon3D.xml b/doc/classes/CollisionPolygon3D.xml
index 16090c203e..4f5866c348 100644
--- a/doc/classes/CollisionPolygon3D.xml
+++ b/doc/classes/CollisionPolygon3D.xml
@@ -21,7 +21,6 @@
 		</member>
 		<member name="polygon" type="PackedVector2Array" setter="set_polygon" getter="get_polygon" default="PackedVector2Array()">
 			Array of vertices which define the 2D polygon in the local XY plane.
-			[b]Note:[/b] The returned value is a copy of the original. Methods which mutate the size or properties of the return value will not impact the original polygon. To change properties of the polygon, assign it to a temporary variable and make changes before reassigning the class property.
 		</member>
 	</members>
 </class>
diff --git a/doc/classes/CollisionShape2D.xml b/doc/classes/CollisionShape2D.xml
index 1320982376..dd04bf7b82 100644
--- a/doc/classes/CollisionShape2D.xml
+++ b/doc/classes/CollisionShape2D.xml
@@ -8,9 +8,9 @@
 	</description>
 	<tutorials>
 		<link title="Physics introduction">$DOCS_URL/tutorials/physics/physics_introduction.html</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
-		<link title="2D Pong Demo">https://godotengine.org/asset-library/asset/121</link>
-		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/113</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
+		<link title="2D Pong Demo">https://godotengine.org/asset-library/asset/2728</link>
+		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2719</link>
 	</tutorials>
 	<members>
 		<member name="debug_color" type="Color" setter="set_debug_color" getter="get_debug_color" default="Color(0, 0, 0, 1)">
diff --git a/doc/classes/CollisionShape3D.xml b/doc/classes/CollisionShape3D.xml
index f6c0c323f4..a4e0ed0b28 100644
--- a/doc/classes/CollisionShape3D.xml
+++ b/doc/classes/CollisionShape3D.xml
@@ -9,9 +9,9 @@
 	</description>
 	<tutorials>
 		<link title="Physics introduction">$DOCS_URL/tutorials/physics/physics_introduction.html</link>
-		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/126</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2739</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="make_convex_from_siblings">
diff --git a/doc/classes/Color.xml b/doc/classes/Color.xml
index 942d3bc80d..37beca5f81 100644
--- a/doc/classes/Color.xml
+++ b/doc/classes/Color.xml
@@ -10,9 +10,9 @@
 		[url=https://raw.githubusercontent.com/godotengine/godot-docs/master/img/color_constants.png]Color constants cheatsheet[/url]
 	</description>
 	<tutorials>
-		<link title="2D GD Paint Demo">https://godotengine.org/asset-library/asset/517</link>
-		<link title="Tween Demo">https://godotengine.org/asset-library/asset/146</link>
-		<link title="GUI Drag And Drop Demo">https://godotengine.org/asset-library/asset/133</link>
+		<link title="2D GD Paint Demo">https://godotengine.org/asset-library/asset/2768</link>
+		<link title="Tween Interpolation Demo">https://godotengine.org/asset-library/asset/2733</link>
+		<link title="GUI Drag And Drop Demo">https://godotengine.org/asset-library/asset/2767</link>
 	</tutorials>
 	<constructors>
 		<constructor name="Color">
diff --git a/doc/classes/ColorPicker.xml b/doc/classes/ColorPicker.xml
index 9aa0122ed2..cf26f917e1 100644
--- a/doc/classes/ColorPicker.xml
+++ b/doc/classes/ColorPicker.xml
@@ -8,7 +8,7 @@
 		[b]Note:[/b] This control is the color picker widget itself. You can use a [ColorPickerButton] instead if you need a button that brings up a [ColorPicker] in a popup.
 	</description>
 	<tutorials>
-		<link title="Tween Demo">https://godotengine.org/asset-library/asset/146</link>
+		<link title="Tween Interpolation Demo">https://godotengine.org/asset-library/asset/2733</link>
 	</tutorials>
 	<methods>
 		<method name="add_preset">
diff --git a/doc/classes/ColorPickerButton.xml b/doc/classes/ColorPickerButton.xml
index c53d61f036..bec2520397 100644
--- a/doc/classes/ColorPickerButton.xml
+++ b/doc/classes/ColorPickerButton.xml
@@ -9,8 +9,8 @@
 		[b]Note:[/b] By default, the button may not be wide enough for the color preview swatch to be visible. Make sure to set [member Control.custom_minimum_size] to a big enough value to give the button enough space.
 	</description>
 	<tutorials>
-		<link title="GUI Drag And Drop Demo">https://godotengine.org/asset-library/asset/133</link>
-		<link title="2D GD Paint Demo">https://godotengine.org/asset-library/asset/517</link>
+		<link title="2D GD Paint Demo">https://godotengine.org/asset-library/asset/2768</link>
+		<link title="GUI Drag And Drop Demo">https://godotengine.org/asset-library/asset/2767</link>
 	</tutorials>
 	<methods>
 		<method name="get_picker">
diff --git a/doc/classes/ColorRect.xml b/doc/classes/ColorRect.xml
index 413d51db72..6b2ddd748e 100644
--- a/doc/classes/ColorRect.xml
+++ b/doc/classes/ColorRect.xml
@@ -7,7 +7,7 @@
 		Displays a rectangle filled with a solid [member color]. If you need to display the border alone, consider using a [Panel] instead.
 	</description>
 	<tutorials>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
 	</tutorials>
 	<members>
 		<member name="color" type="Color" setter="set_color" getter="get_color" default="Color(1, 1, 1, 1)" keywords="colour">
diff --git a/doc/classes/ConcavePolygonShape3D.xml b/doc/classes/ConcavePolygonShape3D.xml
index 7e4df2073f..5c93325b63 100644
--- a/doc/classes/ConcavePolygonShape3D.xml
+++ b/doc/classes/ConcavePolygonShape3D.xml
@@ -11,7 +11,7 @@
 		[b]Performance:[/b] Due to its complexity, [ConcavePolygonShape3D] is the slowest 3D collision shape to check collisions against. Its use should generally be limited to level geometry. For convex geometry, [ConvexPolygonShape3D] should be used. For dynamic physics bodies that need concave collision, several [ConvexPolygonShape3D]s can be used to represent its collision by using convex decomposition; see [ConvexPolygonShape3D]'s documentation for instructions.
 	</description>
 	<tutorials>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
 	</tutorials>
 	<methods>
 		<method name="get_faces" qualifiers="const">
diff --git a/doc/classes/ConfigFile.xml b/doc/classes/ConfigFile.xml
index e6b28ae98e..feeea339fe 100644
--- a/doc/classes/ConfigFile.xml
+++ b/doc/classes/ConfigFile.xml
@@ -5,7 +5,7 @@
 	</brief_description>
 	<description>
 		This helper class can be used to store [Variant] values on the filesystem using INI-style formatting. The stored values are identified by a section and a key:
-		[codeblock]
+		[codeblock lang=text]
 		[section]
 		some_key=42
 		string_example="Hello World3D!"
diff --git a/doc/classes/ConvexPolygonShape3D.xml b/doc/classes/ConvexPolygonShape3D.xml
index 280cadde78..98c3459289 100644
--- a/doc/classes/ConvexPolygonShape3D.xml
+++ b/doc/classes/ConvexPolygonShape3D.xml
@@ -10,7 +10,7 @@
 		[b]Performance:[/b] [ConvexPolygonShape3D] is faster to check collisions against compared to [ConcavePolygonShape3D], but it is slower than primitive collision shapes such as [SphereShape3D] and [BoxShape3D]. Its use should generally be limited to medium-sized objects that cannot have their collision accurately represented by primitive shapes.
 	</description>
 	<tutorials>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
 	</tutorials>
 	<members>
 		<member name="points" type="PackedVector3Array" setter="set_points" getter="get_points" default="PackedVector3Array()">
diff --git a/doc/classes/CylinderShape3D.xml b/doc/classes/CylinderShape3D.xml
index 8bec199ab6..db98cac6e3 100644
--- a/doc/classes/CylinderShape3D.xml
+++ b/doc/classes/CylinderShape3D.xml
@@ -9,9 +9,9 @@
 		[b]Performance:[/b] [CylinderShape3D] is fast to check collisions against, but it is slower than [CapsuleShape3D], [BoxShape3D], and [SphereShape3D].
 	</description>
 	<tutorials>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<members>
 		<member name="height" type="float" setter="set_height" getter="get_height" default="2.0">
diff --git a/doc/classes/Dictionary.xml b/doc/classes/Dictionary.xml
index 14ce7c894f..7f0fdddcdd 100644
--- a/doc/classes/Dictionary.xml
+++ b/doc/classes/Dictionary.xml
@@ -138,8 +138,8 @@
 	</description>
 	<tutorials>
 		<link title="GDScript basics: Dictionary">$DOCS_URL/tutorials/scripting/gdscript/gdscript_basics.html#dictionary</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
-		<link title="OS Test Demo">https://godotengine.org/asset-library/asset/677</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
+		<link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link>
 	</tutorials>
 	<constructors>
 		<constructor name="Dictionary">
diff --git a/doc/classes/DisplayServer.xml b/doc/classes/DisplayServer.xml
index 9eb5f9e334..42ca336a25 100644
--- a/doc/classes/DisplayServer.xml
+++ b/doc/classes/DisplayServer.xml
@@ -249,7 +249,7 @@
 			<param index="0" name="position" type="Vector2i" />
 			<description>
 				Returns the ID of the window at the specified screen [param position] (in pixels). On multi-monitor setups, the screen position is relative to the virtual desktop area. On multi-monitor setups with different screen resolutions or orientations, the origin may be located outside any display like this:
-				[codeblock]
+				[codeblock lang=text]
 				* (0, 0)        +-------+
 				                |       |
 				+-------------+ |       |
@@ -282,7 +282,7 @@
 				[b]Note:[/b] The [param callback] and [param key_callback] Callables need to accept exactly one Variant parameter, the parameter passed to the Callables will be the value passed to [param tag].
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -308,7 +308,7 @@
 				[b]Note:[/b] The [param callback] and [param key_callback] Callables need to accept exactly one Variant parameter, the parameter passed to the Callables will be the value passed to [param tag].
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -334,7 +334,7 @@
 				[b]Note:[/b] The [param callback] and [param key_callback] Callables need to accept exactly one Variant parameter, the parameter passed to the Callables will be the value passed to [param tag].
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -361,7 +361,7 @@
 				[b]Note:[/b] The [param callback] and [param key_callback] Callables need to accept exactly one Variant parameter, the parameter passed to the Callables will be the value passed to [param tag].
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -386,7 +386,7 @@
 				[b]Note:[/b] The [param callback] and [param key_callback] Callables need to accept exactly one Variant parameter, the parameter passed to the Callables will be the value passed to [param tag].
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -415,7 +415,7 @@
 				[b]Note:[/b] The [param callback] and [param key_callback] Callables need to accept exactly one Variant parameter, the parameter passed to the Callables will be the value passed to [param tag].
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -441,7 +441,7 @@
 				[b]Note:[/b] The [param callback] and [param key_callback] Callables need to accept exactly one Variant parameter, the parameter passed to the Callables will be the value passed to [param tag].
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -459,7 +459,7 @@
 				Returns index of the inserted item, it's not guaranteed to be the same as [param index] value.
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -479,7 +479,7 @@
 				Returns index of the inserted item, it's not guaranteed to be the same as [param index] value.
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -495,7 +495,7 @@
 				Removes all items from the global menu with ID [param menu_root].
 				[b]Note:[/b] This method is implemented only on macOS.
 				[b]Supported system menu IDs:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				"_main" - Main menu (macOS).
 				"_dock" - Dock popup menu (macOS).
 				"_apple" - Apple menu (macOS, custom items added before "Services").
@@ -1022,7 +1022,7 @@
 				Returns the dots per inch density of the specified screen. If [param screen] is [constant SCREEN_OF_MAIN_WINDOW] (the default value), a screen with the main window will be used.
 				[b]Note:[/b] On macOS, returned value is inaccurate if fractional display scaling mode is used.
 				[b]Note:[/b] On Android devices, the actual screen densities are grouped into six generalized densities:
-				[codeblock]
+				[codeblock lang=text]
 				   ldpi - 120 dpi
 				   mdpi - 160 dpi
 				   hdpi - 240 dpi
@@ -1072,7 +1072,7 @@
 			<param index="0" name="screen" type="int" default="-1" />
 			<description>
 				Returns the screen's top-left corner position in pixels. On multi-monitor setups, the screen position is relative to the virtual desktop area. On multi-monitor setups with different screen resolutions or orientations, the origin may be located outside any display like this:
-				[codeblock]
+				[codeblock lang=text]
 				* (0, 0)        +-------+
 				                |       |
 				+-------------+ |       |
@@ -1674,7 +1674,7 @@
 			<param index="1" name="window_id" type="int" default="0" />
 			<description>
 				Sets the position of the given window to [param position]. On multi-monitor setups, the screen position is relative to the virtual desktop area. On multi-monitor setups with different screen resolutions or orientations, the origin may be located outside any display like this:
-				[codeblock]
+				[codeblock lang=text]
 				* (0, 0)        +-------+
 				                |       |
 				+-------------+ |       |
diff --git a/doc/classes/EditorExportPlatformPC.xml b/doc/classes/EditorExportPlatformPC.xml
index 3c2a27deab..def14e5955 100644
--- a/doc/classes/EditorExportPlatformPC.xml
+++ b/doc/classes/EditorExportPlatformPC.xml
@@ -4,7 +4,10 @@
 		Base class for the desktop platform exporter (Windows and Linux/BSD).
 	</brief_description>
 	<description>
+		The base class for the desktop platform exporters. These include Windows and Linux/BSD, but not macOS. See the classes inheriting this one for more details.
 	</description>
 	<tutorials>
+		<link title="Exporting for Windows">$DOCS_URL/tutorials/export/exporting_for_windows.html</link>
+		<link title="Exporting for Linux">$DOCS_URL/tutorials/export/exporting_for_linux.html</link>
 	</tutorials>
 </class>
diff --git a/doc/classes/EditorPaths.xml b/doc/classes/EditorPaths.xml
index 94ddb37658..f57e728c93 100644
--- a/doc/classes/EditorPaths.xml
+++ b/doc/classes/EditorPaths.xml
@@ -17,7 +17,7 @@
 			<description>
 				Returns the absolute path to the user's cache folder. This folder should be used for temporary data that can be removed safely whenever the editor is closed (such as generated resource thumbnails).
 				[b]Default paths per platform:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				- Windows: %LOCALAPPDATA%\Godot\
 				- macOS: ~/Library/Caches/Godot/
 				- Linux: ~/.cache/godot/
@@ -29,7 +29,7 @@
 			<description>
 				Returns the absolute path to the user's configuration folder. This folder should be used for [i]persistent[/i] user configuration files.
 				[b]Default paths per platform:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				- Windows: %APPDATA%\Godot\                    (same as `get_data_dir()`)
 				- macOS: ~/Library/Application Support/Godot/  (same as `get_data_dir()`)
 				- Linux: ~/.config/godot/
@@ -41,7 +41,7 @@
 			<description>
 				Returns the absolute path to the user's data folder. This folder should be used for [i]persistent[/i] user data files such as installed export templates.
 				[b]Default paths per platform:[/b]
-				[codeblock]
+				[codeblock lang=text]
 				- Windows: %APPDATA%\Godot\                    (same as `get_config_dir()`)
 				- macOS: ~/Library/Application Support/Godot/  (same as `get_config_dir()`)
 				- Linux: ~/.local/share/godot/
diff --git a/doc/classes/EditorSettings.xml b/doc/classes/EditorSettings.xml
index 573171b7e1..87ca0536b8 100644
--- a/doc/classes/EditorSettings.xml
+++ b/doc/classes/EditorSettings.xml
@@ -830,7 +830,7 @@
 		<member name="interface/theme/icon_and_font_color" type="int" setter="" getter="">
 			The icon and font color scheme to use in the editor.
 			- [b]Auto[/b] determines the color scheme to use automatically based on [member interface/theme/base_color].
-			- [b]Dark[/b] makes fonts and icons dark (suitable for light themes). Icon colors are automatically converted by the editor following the set of rules defined in [url=https://github.com/godotengine/godot/blob/master/editor/editor_themes.cpp]this file[/url].
+			- [b]Dark[/b] makes fonts and icons dark (suitable for light themes). Icon colors are automatically converted by the editor following the set of rules defined in [url=https://github.com/godotengine/godot/blob/master/editor/themes/editor_theme_manager.cpp]this file[/url].
 			- [b]Light[/b] makes fonts and icons light (suitable for dark themes).
 		</member>
 		<member name="interface/theme/icon_saturation" type="float" setter="" getter="">
diff --git a/doc/classes/Environment.xml b/doc/classes/Environment.xml
index 189517c392..c28476aa3c 100644
--- a/doc/classes/Environment.xml
+++ b/doc/classes/Environment.xml
@@ -13,9 +13,8 @@
 	<tutorials>
 		<link title="Environment and post-processing">$DOCS_URL/tutorials/3d/environment_and_post_processing.html</link>
 		<link title="High dynamic range lighting">$DOCS_URL/tutorials/3d/high_dynamic_range.html</link>
-		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/123</link>
-		<link title="2D HDR Demo">https://godotengine.org/asset-library/asset/110</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/2742</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="get_glow_level" qualifiers="const">
diff --git a/doc/classes/FileAccess.xml b/doc/classes/FileAccess.xml
index fad6cbcc93..6b9c0bcc76 100644
--- a/doc/classes/FileAccess.xml
+++ b/doc/classes/FileAccess.xml
@@ -40,7 +40,7 @@
 	<tutorials>
 		<link title="File system">$DOCS_URL/tutorials/scripting/filesystem.html</link>
 		<link title="Runtime file loading and saving">$DOCS_URL/tutorials/io/runtime_file_loading_and_saving.html</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<methods>
 		<method name="close">
@@ -131,7 +131,7 @@
 				Returns the next value of the file in CSV (Comma-Separated Values) format. You can pass a different delimiter [param delim] to use other than the default [code]","[/code] (comma). This delimiter must be one-character long, and cannot be a double quotation mark.
 				Text is interpreted as being UTF-8 encoded. Text values must be enclosed in double quotes if they include the delimiter character. Double quotes within a text value can be escaped by doubling their occurrence.
 				For example, the following CSV lines are valid and will be properly parsed as two strings each:
-				[codeblock]
+				[codeblock lang=text]
 				Alice,"Hello, Bob!"
 				Bob,Alice! What a surprise!
 				Alice,"I thought you'd reply with ""Hello, world""."
diff --git a/doc/classes/GPUParticles2D.xml b/doc/classes/GPUParticles2D.xml
index f4ba305f8b..a7d89e8596 100644
--- a/doc/classes/GPUParticles2D.xml
+++ b/doc/classes/GPUParticles2D.xml
@@ -10,8 +10,8 @@
 	</description>
 	<tutorials>
 		<link title="Particle systems (2D)">$DOCS_URL/tutorials/2d/particle_systems_2d.html</link>
-		<link title="2D Particles Demo">https://godotengine.org/asset-library/asset/118</link>
-		<link title="2D Dodge The Creeps Demo (uses GPUParticles2D for the trail behind the player)">https://godotengine.org/asset-library/asset/515</link>
+		<link title="2D Particles Demo">https://godotengine.org/asset-library/asset/2724</link>
+		<link title="2D Dodge The Creeps Demo (uses GPUParticles2D for the trail behind the player)">https://godotengine.org/asset-library/asset/2712</link>
 	</tutorials>
 	<methods>
 		<method name="capture_rect" qualifiers="const">
@@ -43,7 +43,8 @@
 		<method name="restart">
 			<return type="void" />
 			<description>
-				Restarts all the existing particles.
+				Restarts the particle emission cycle, clearing existing particles. To avoid particles vanishing from the viewport, wait for the [signal finished] signal before calling.			
+				[b]Note:[/b] The [signal finished] signal is only emitted by [member one_shot] emitters.
 			</description>
 		</method>
 	</methods>
@@ -64,7 +65,9 @@
 			Particle draw order. Uses [enum DrawOrder] values.
 		</member>
 		<member name="emitting" type="bool" setter="set_emitting" getter="is_emitting" default="true">
-			If [code]true[/code], particles are being emitted. [member emitting] can be used to start and stop particles from emitting. However, if [member one_shot] is [code]true[/code] setting [member emitting] to [code]true[/code] will not restart the emission cycle until after all active particles finish processing. You can use the [signal finished] signal to be notified once all active particles finish processing.
+			If [code]true[/code], particles are being emitted. [member emitting] can be used to start and stop particles from emitting. However, if [member one_shot] is [code]true[/code] setting [member emitting] to [code]true[/code] will not restart the emission cycle unless all active particles have finished processing. Use the [signal finished] signal to be notified once all active particles finish processing.
+			[b]Note:[/b] For [member one_shot] emitters, due to the particles being computed on the GPU, there may be a short period after receiving the [signal finished] signal during which setting this to [code]true[/code] will not restart the emission cycle.
+			[b]Tip:[/b] If your [member one_shot] emitter needs to immediately restart emitting particles once [signal finished] signal is received, consider calling [method restart] instead of setting [member emitting].
 		</member>
 		<member name="explosiveness" type="float" setter="set_explosiveness_ratio" getter="get_explosiveness_ratio" default="0.0">
 			How rapidly particles in an emission cycle are emitted. If greater than [code]0[/code], there will be a gap in emissions before the next cycle begins.
@@ -132,8 +135,9 @@
 	<signals>
 		<signal name="finished">
 			<description>
-				Emitted when all active particles have finished processing. When [member one_shot] is disabled, particles will process continuously, so this is never emitted.
-				[b]Note:[/b] Due to the particles being computed on the GPU there might be a delay before the signal gets emitted.
+				Emitted when all active particles have finished processing. To immediately restart the emission cycle, call [method restart].
+				Never emitted when [member one_shot] is disabled, as particles will be emitted and processed continuously.
+				[b]Note:[/b] For [member one_shot] emitters, due to the particles being computed on the GPU, there may be a short period after receiving the signal during which setting [member emitting] to [code]true[/code] will not restart the emission cycle. This delay is avoided by instead calling [method restart].
 			</description>
 		</signal>
 	</signals>
diff --git a/doc/classes/GPUParticles3D.xml b/doc/classes/GPUParticles3D.xml
index d1903b85cd..61a3b467f1 100644
--- a/doc/classes/GPUParticles3D.xml
+++ b/doc/classes/GPUParticles3D.xml
@@ -10,7 +10,7 @@
 	<tutorials>
 		<link title="Particle systems (3D)">$DOCS_URL/tutorials/3d/particles/index.html</link>
 		<link title="Controlling thousands of fish with Particles">$DOCS_URL/tutorials/performance/vertex_animation/controlling_thousands_of_fish.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="capture_aabb" qualifiers="const">
@@ -48,7 +48,8 @@
 		<method name="restart">
 			<return type="void" />
 			<description>
-				Restarts the particle emission, clearing existing particles.
+				Restarts the particle emission cycle, clearing existing particles. To avoid particles vanishing from the viewport, wait for the [signal finished] signal before calling.
+				[b]Note:[/b] The [signal finished] signal is only emitted by [member one_shot] emitters.
 			</description>
 		</method>
 		<method name="set_draw_pass_mesh">
@@ -95,7 +96,9 @@
 		<member name="draw_skin" type="Skin" setter="set_skin" getter="get_skin">
 		</member>
 		<member name="emitting" type="bool" setter="set_emitting" getter="is_emitting" default="true">
-			If [code]true[/code], particles are being emitted. [member emitting] can be used to start and stop particles from emitting. However, if [member one_shot] is [code]true[/code] setting [member emitting] to [code]true[/code] will not restart the emission cycle until after all active particles finish processing. You can use the [signal finished] signal to be notified once all active particles finish processing.
+			If [code]true[/code], particles are being emitted. [member emitting] can be used to start and stop particles from emitting. However, if [member one_shot] is [code]true[/code] setting [member emitting] to [code]true[/code] will not restart the emission cycle unless all active particles have finished processing. Use the [signal finished] signal to be notified once all active particles finish processing.
+			[b]Note:[/b] For [member one_shot] emitters, due to the particles being computed on the GPU, there may be a short period after receiving the [signal finished] signal during which setting this to [code]true[/code] will not restart the emission cycle.
+			[b]Tip:[/b] If your [member one_shot] emitter needs to immediately restart emitting particles once [signal finished] signal is received, consider calling [method restart] instead of setting [member emitting].
 		</member>
 		<member name="explosiveness" type="float" setter="set_explosiveness_ratio" getter="get_explosiveness_ratio" default="0.0">
 			Time ratio between each emission. If [code]0[/code], particles are emitted continuously. If [code]1[/code], all particles are emitted simultaneously.
@@ -157,8 +160,9 @@
 	<signals>
 		<signal name="finished">
 			<description>
-				Emitted when all active particles have finished processing. When [member one_shot] is disabled, particles will process continuously, so this is never emitted.
-				[b]Note:[/b] Due to the particles being computed on the GPU there might be a delay before the signal gets emitted.
+				Emitted when all active particles have finished processing. To immediately emit new particles, call [method restart].
+				Never emitted when [member one_shot] is disabled, as particles will be emitted and processed continuously.
+				[b]Note:[/b] For [member one_shot] emitters, due to the particles being computed on the GPU, there may be a short period after receiving the signal during which setting [member emitting] to [code]true[/code] will not restart the emission cycle. This delay is avoided by instead calling [method restart].
 			</description>
 		</signal>
 	</signals>
diff --git a/doc/classes/Gradient.xml b/doc/classes/Gradient.xml
index c7fa1f40e0..c85b3acafa 100644
--- a/doc/classes/Gradient.xml
+++ b/doc/classes/Gradient.xml
@@ -78,8 +78,8 @@
 	</methods>
 	<members>
 		<member name="colors" type="PackedColorArray" setter="set_colors" getter="get_colors" default="PackedColorArray(0, 0, 0, 1, 1, 1, 1, 1)">
-			Gradient's colors returned as a [PackedColorArray].
-			[b]Note:[/b] This property returns a copy, modifying the return value does not update the gradient. To update the gradient use [method set_color] method (for updating colors individually) or assign to this property directly (for bulk-updating all colors at once).
+			Gradient's colors as a [PackedColorArray].
+			[b]Note:[/b] Setting this property updates all colors at once. To update any color individually use [method set_color].
 		</member>
 		<member name="interpolation_color_space" type="int" setter="set_interpolation_color_space" getter="get_interpolation_color_space" enum="Gradient.ColorSpace" default="0">
 			The color space used to interpolate between points of the gradient. It does not affect the returned colors, which will always be in sRGB space. See [enum ColorSpace] for available modes.
@@ -89,8 +89,8 @@
 			The algorithm used to interpolate between points of the gradient. See [enum InterpolationMode] for available modes.
 		</member>
 		<member name="offsets" type="PackedFloat32Array" setter="set_offsets" getter="get_offsets" default="PackedFloat32Array(0, 1)">
-			Gradient's offsets returned as a [PackedFloat32Array].
-			[b]Note:[/b] This property returns a copy, modifying the return value does not update the gradient. To update the gradient use [method set_offset] method (for updating offsets individually) or assign to this property directly (for bulk-updating all offsets at once).
+			Gradient's offsets as a [PackedFloat32Array].
+			[b]Note:[/b] Setting this property updates all offsets at once. To update any offset individually use [method set_offset].
 		</member>
 	</members>
 	<constants>
diff --git a/doc/classes/GridContainer.xml b/doc/classes/GridContainer.xml
index d69f4a10d2..76f789a058 100644
--- a/doc/classes/GridContainer.xml
+++ b/doc/classes/GridContainer.xml
@@ -9,7 +9,7 @@
 	</description>
 	<tutorials>
 		<link title="Using Containers">$DOCS_URL/tutorials/ui/gui_containers.html</link>
-		<link title="OS Test Demo">https://godotengine.org/asset-library/asset/677</link>
+		<link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link>
 	</tutorials>
 	<members>
 		<member name="columns" type="int" setter="set_columns" getter="get_columns" default="1">
diff --git a/doc/classes/Input.xml b/doc/classes/Input.xml
index e622a6bce3..119ecb7f0e 100644
--- a/doc/classes/Input.xml
+++ b/doc/classes/Input.xml
@@ -9,8 +9,8 @@
 	</description>
 	<tutorials>
 		<link title="Inputs documentation index">$DOCS_URL/tutorials/inputs/index.html</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<methods>
 		<method name="action_press">
diff --git a/doc/classes/InputEvent.xml b/doc/classes/InputEvent.xml
index 96a4612466..6f2e6aac20 100644
--- a/doc/classes/InputEvent.xml
+++ b/doc/classes/InputEvent.xml
@@ -9,8 +9,8 @@
 	<tutorials>
 		<link title="Using InputEvent">$DOCS_URL/tutorials/inputs/inputevent.html</link>
 		<link title="Viewport and canvas transforms">$DOCS_URL/tutorials/2d/2d_transforms.html</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<methods>
 		<method name="accumulate">
diff --git a/doc/classes/InputEventAction.xml b/doc/classes/InputEventAction.xml
index 1fe4cfcd79..4715f9fe95 100644
--- a/doc/classes/InputEventAction.xml
+++ b/doc/classes/InputEventAction.xml
@@ -9,8 +9,8 @@
 	</description>
 	<tutorials>
 		<link title="Using InputEvent: Actions">$DOCS_URL/tutorials/inputs/inputevent.html#actions</link>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<members>
 		<member name="action" type="StringName" setter="set_action" getter="get_action" default="&amp;&quot;&quot;">
diff --git a/doc/classes/InputEventKey.xml b/doc/classes/InputEventKey.xml
index 7b6fc3f216..dc4872ba03 100644
--- a/doc/classes/InputEventKey.xml
+++ b/doc/classes/InputEventKey.xml
@@ -66,7 +66,7 @@
 			Represents the localized label printed on the key in the current keyboard layout, which corresponds to one of the [enum Key] constants or any valid Unicode character.
 			For keyboard layouts with a single label on the key, it is equivalent to [member keycode].
 			To get a human-readable representation of the [InputEventKey], use [code]OS.get_keycode_string(event.key_label)[/code] where [code]event[/code] is the [InputEventKey].
-			[codeblock]
+			[codeblock lang=text]
 			    +-----+ +-----+
 			    | Q   | | Q   | - "Q" - keycode
 			    |   Й | |  ض | - "Й" and "ض" - key_label
@@ -76,7 +76,7 @@
 		<member name="keycode" type="int" setter="set_keycode" getter="get_keycode" enum="Key" default="0">
 			Latin label printed on the key in the current keyboard layout, which corresponds to one of the [enum Key] constants.
 			To get a human-readable representation of the [InputEventKey], use [code]OS.get_keycode_string(event.keycode)[/code] where [code]event[/code] is the [InputEventKey].
-			[codeblock]
+			[codeblock lang=text]
 			    +-----+ +-----+
 			    | Q   | | Q   | - "Q" - keycode
 			    |   Й | |  ض | - "Й" and "ض" - key_label
diff --git a/doc/classes/InputEventMouseMotion.xml b/doc/classes/InputEventMouseMotion.xml
index e6ec674975..98a0221fe8 100644
--- a/doc/classes/InputEventMouseMotion.xml
+++ b/doc/classes/InputEventMouseMotion.xml
@@ -10,7 +10,7 @@
 	<tutorials>
 		<link title="Using InputEvent">$DOCS_URL/tutorials/inputs/inputevent.html</link>
 		<link title="Mouse and input coordinates">$DOCS_URL/tutorials/inputs/mouse_and_input_coordinates.html</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<members>
 		<member name="pen_inverted" type="bool" setter="set_pen_inverted" getter="get_pen_inverted" default="false">
diff --git a/doc/classes/JSONRPC.xml b/doc/classes/JSONRPC.xml
index 348688f7f8..e5ee93cb93 100644
--- a/doc/classes/JSONRPC.xml
+++ b/doc/classes/JSONRPC.xml
@@ -79,15 +79,19 @@
 	</methods>
 	<constants>
 		<constant name="PARSE_ERROR" value="-32700" enum="ErrorCode">
+			The request could not be parsed as it was not valid by JSON standard ([method JSON.parse] failed).
 		</constant>
 		<constant name="INVALID_REQUEST" value="-32600" enum="ErrorCode">
+			A method call was requested but the request's format is not valid.
 		</constant>
 		<constant name="METHOD_NOT_FOUND" value="-32601" enum="ErrorCode">
 			A method call was requested but no function of that name existed in the JSONRPC subclass.
 		</constant>
 		<constant name="INVALID_PARAMS" value="-32602" enum="ErrorCode">
+			A method call was requested but the given method parameters are not valid. Not used by the built-in JSONRPC.
 		</constant>
 		<constant name="INTERNAL_ERROR" value="-32603" enum="ErrorCode">
+			An internal error occurred while processing the request. Not used by the built-in JSONRPC.
 		</constant>
 	</constants>
 </class>
diff --git a/doc/classes/JavaClass.xml b/doc/classes/JavaClass.xml
index 541f23013d..ecfcaa8781 100644
--- a/doc/classes/JavaClass.xml
+++ b/doc/classes/JavaClass.xml
@@ -1,8 +1,12 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="JavaClass" inherits="RefCounted" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../class.xsd">
 	<brief_description>
+		Represents an object from the Java Native Interface.
 	</brief_description>
 	<description>
+		Represents an object from the Java Native Interface. It is returned from [method JavaClassWrapper.wrap].
+		[b]Note:[/b] This class only works on Android. For any other build, this class does nothing.
+		[b]Note:[/b] This class is not to be confused with [JavaScriptObject].
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/JavaClassWrapper.xml b/doc/classes/JavaClassWrapper.xml
index e197b1e97e..01c3392b04 100644
--- a/doc/classes/JavaClassWrapper.xml
+++ b/doc/classes/JavaClassWrapper.xml
@@ -1,8 +1,11 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="JavaClassWrapper" inherits="Object" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../class.xsd">
 	<brief_description>
+		Provides access to the Java Native Interface.
 	</brief_description>
 	<description>
+		The JavaClassWrapper singleton provides a way for the Godot application to send and receive data through the [url=https://developer.android.com/training/articles/perf-jni]Java Native Interface[/url] (JNI).
+		[b]Note:[/b] This singleton is only available in Android builds.
 	</description>
 	<tutorials>
 	</tutorials>
@@ -11,6 +14,8 @@
 			<return type="JavaClass" />
 			<param index="0" name="name" type="String" />
 			<description>
+				Wraps a class defined in Java, and returns it as a [JavaClass] [Object] type that Godot can interact with.
+				[b]Note:[/b] This method only works on Android. On every other platform, this method does nothing and returns an empty [JavaClass].
 			</description>
 		</method>
 	</methods>
diff --git a/doc/classes/Joint3D.xml b/doc/classes/Joint3D.xml
index 9e0b753701..ea0dda881a 100644
--- a/doc/classes/Joint3D.xml
+++ b/doc/classes/Joint3D.xml
@@ -7,7 +7,7 @@
 		Abstract base class for all joints in 3D physics. 3D joints bind together two physics bodies and apply a constraint.
 	</description>
 	<tutorials>
-		<link title="3D Truck Town Demo">https://godotengine.org/asset-library/asset/524</link>
+		<link title="3D Truck Town Demo">https://godotengine.org/asset-library/asset/2752</link>
 	</tutorials>
 	<methods>
 		<method name="get_rid" qualifiers="const">
diff --git a/doc/classes/Label.xml b/doc/classes/Label.xml
index f39f5616f0..8acd05cbd1 100644
--- a/doc/classes/Label.xml
+++ b/doc/classes/Label.xml
@@ -7,7 +7,7 @@
 		A control for displaying plain text. It gives you control over the horizontal and vertical alignment and can wrap the text inside the node's bounding rectangle. It doesn't support bold, italics, or other rich text formatting. For that, use [RichTextLabel] instead.
 	</description>
 	<tutorials>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
 	</tutorials>
 	<methods>
 		<method name="get_character_bounds" qualifiers="const">
diff --git a/doc/classes/Light3D.xml b/doc/classes/Light3D.xml
index bffa20bf23..c1fc49cf9f 100644
--- a/doc/classes/Light3D.xml
+++ b/doc/classes/Light3D.xml
@@ -9,7 +9,7 @@
 	<tutorials>
 		<link title="3D lights and shadows">$DOCS_URL/tutorials/3d/lights_and_shadows.html</link>
 		<link title="Faking global illumination">$DOCS_URL/tutorials/3d/global_illumination/faking_global_illumination.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="get_correlated_color" qualifiers="const">
diff --git a/doc/classes/Line2D.xml b/doc/classes/Line2D.xml
index 283d847a93..a553e79746 100644
--- a/doc/classes/Line2D.xml
+++ b/doc/classes/Line2D.xml
@@ -9,8 +9,8 @@
 		[b]Note:[/b] [Line2D] is drawn using a 2D mesh.
 	</description>
 	<tutorials>
-		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/584</link>
-		<link title="2.5D Demo">https://godotengine.org/asset-library/asset/583</link>
+		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/2787</link>
+		<link title="2.5D Game Demo">https://godotengine.org/asset-library/asset/2783</link>
 	</tutorials>
 	<methods>
 		<method name="add_point">
diff --git a/doc/classes/Material.xml b/doc/classes/Material.xml
index 87fa3fd676..4a73bc2271 100644
--- a/doc/classes/Material.xml
+++ b/doc/classes/Material.xml
@@ -8,8 +8,8 @@
 		Importantly, you can inherit from [Material] to create your own custom material type in script or in GDExtension.
 	</description>
 	<tutorials>
-		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/123</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/2742</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="_can_do_next_pass" qualifiers="virtual const">
diff --git a/doc/classes/Mesh.xml b/doc/classes/Mesh.xml
index 3f8ed91ad7..966e870940 100644
--- a/doc/classes/Mesh.xml
+++ b/doc/classes/Mesh.xml
@@ -7,10 +7,10 @@
 		Mesh is a type of [Resource] that contains vertex array-based geometry, divided in [i]surfaces[/i]. Each surface contains a completely separate array and a material used to draw it. Design wise, a mesh with multiple surfaces is preferred to a single surface, because objects created in 3D editing software commonly contain multiple materials.
 	</description>
 	<tutorials>
-		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/123</link>
-		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/126</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/2742</link>
+		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2739</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="_get_aabb" qualifiers="virtual const">
diff --git a/doc/classes/MeshInstance3D.xml b/doc/classes/MeshInstance3D.xml
index 4645435f55..e5187cf7a1 100644
--- a/doc/classes/MeshInstance3D.xml
+++ b/doc/classes/MeshInstance3D.xml
@@ -7,10 +7,10 @@
 		MeshInstance3D is a node that takes a [Mesh] resource and adds it to the current scenario by creating an instance of it. This is the class most often used render 3D geometry and can be used to instance a single [Mesh] in many places. This allows reusing geometry, which can save on resources. When a [Mesh] has to be instantiated more than thousands of times at close proximity, consider using a [MultiMesh] in a [MultiMeshInstance3D] instead.
 	</description>
 	<tutorials>
-		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/123</link>
-		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/126</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/2742</link>
+		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2739</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="create_convex_collision">
@@ -70,6 +70,12 @@
 				Returns the value of the blend shape at the given [param blend_shape_idx]. Returns [code]0.0[/code] and produces an error if [member mesh] is [code]null[/code] or doesn't have a blend shape at that index.
 			</description>
 		</method>
+		<method name="get_skin_reference" qualifiers="const">
+			<return type="SkinReference" />
+			<description>
+				Returns the internal [SkinReference] containing the skeleton's [RID] attached to this RID. See also [method Resource.get_rid], [method SkinReference.get_skeleton], and [method RenderingServer.instance_attach_skeleton].
+			</description>
+		</method>
 		<method name="get_surface_override_material" qualifiers="const">
 			<return type="Material" />
 			<param index="0" name="surface" type="int" />
diff --git a/doc/classes/MeshLibrary.xml b/doc/classes/MeshLibrary.xml
index f7099e569f..c5e8d8cbe7 100644
--- a/doc/classes/MeshLibrary.xml
+++ b/doc/classes/MeshLibrary.xml
@@ -7,8 +7,8 @@
 		A library of meshes. Contains a list of [Mesh] resources, each with a name and ID. Each item can also include collision and navigation shapes. This resource is used in [GridMap].
 	</description>
 	<tutorials>
-		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/126</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
+		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2739</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
 	</tutorials>
 	<methods>
 		<method name="clear">
diff --git a/doc/classes/MultiplayerPeer.xml b/doc/classes/MultiplayerPeer.xml
index 04fd282457..edb2c39e24 100644
--- a/doc/classes/MultiplayerPeer.xml
+++ b/doc/classes/MultiplayerPeer.xml
@@ -10,7 +10,6 @@
 	</description>
 	<tutorials>
 		<link title="High-level multiplayer">$DOCS_URL/tutorials/networking/high_level_multiplayer.html</link>
-		<link title="WebRTC Signaling Demo">https://godotengine.org/asset-library/asset/537</link>
 	</tutorials>
 	<methods>
 		<method name="close">
diff --git a/doc/classes/NavigationMesh.xml b/doc/classes/NavigationMesh.xml
index 42ef354bc8..8be8a89543 100644
--- a/doc/classes/NavigationMesh.xml
+++ b/doc/classes/NavigationMesh.xml
@@ -8,7 +8,7 @@
 	</description>
 	<tutorials>
 		<link title="Using NavigationMeshes">$DOCS_URL/tutorials/navigation/navigation_using_navigationmeshes.html</link>
-		<link title="3D Navmesh Demo">https://godotengine.org/asset-library/asset/124</link>
+		<link title="3D Navigation Demo">https://godotengine.org/asset-library/asset/2743</link>
 	</tutorials>
 	<methods>
 		<method name="add_polygon">
diff --git a/doc/classes/NavigationPolygon.xml b/doc/classes/NavigationPolygon.xml
index 9c4e8169b0..eebdc817a7 100644
--- a/doc/classes/NavigationPolygon.xml
+++ b/doc/classes/NavigationPolygon.xml
@@ -43,8 +43,8 @@
 		[/codeblocks]
 	</description>
 	<tutorials>
-		<link title="2D Navigation Demo">https://godotengine.org/asset-library/asset/117</link>
 		<link title="Using NavigationMeshes">$DOCS_URL/tutorials/navigation/navigation_using_navigationmeshes.html</link>
+		<link title="Navigation Polygon 2D Demo">https://godotengine.org/asset-library/asset/2722</link>
 	</tutorials>
 	<methods>
 		<method name="add_outline">
diff --git a/doc/classes/NavigationServer2D.xml b/doc/classes/NavigationServer2D.xml
index 91d69edf29..baef7dc02d 100644
--- a/doc/classes/NavigationServer2D.xml
+++ b/doc/classes/NavigationServer2D.xml
@@ -14,8 +14,8 @@
 		This server keeps tracks of any call and executes them during the sync phase. This means that you can request any change to the map, using any thread, without worrying.
 	</description>
 	<tutorials>
-		<link title="2D Navigation Demo">https://godotengine.org/asset-library/asset/117</link>
 		<link title="Using NavigationServer">$DOCS_URL/tutorials/navigation/navigation_using_navigationservers.html</link>
+		<link title="Navigation Polygon 2D Demo">https://godotengine.org/asset-library/asset/2722</link>
 	</tutorials>
 	<methods>
 		<method name="agent_create">
diff --git a/doc/classes/NavigationServer3D.xml b/doc/classes/NavigationServer3D.xml
index 29d9f5424a..6be9d5165f 100644
--- a/doc/classes/NavigationServer3D.xml
+++ b/doc/classes/NavigationServer3D.xml
@@ -14,8 +14,8 @@
 		This server keeps tracks of any call and executes them during the sync phase. This means that you can request any change to the map, using any thread, without worrying.
 	</description>
 	<tutorials>
-		<link title="3D Navmesh Demo">https://godotengine.org/asset-library/asset/124</link>
 		<link title="Using NavigationServer">$DOCS_URL/tutorials/navigation/navigation_using_navigationservers.html</link>
+		<link title="3D Navigation Demo">https://godotengine.org/asset-library/asset/2743</link>
 	</tutorials>
 	<methods>
 		<method name="agent_create">
diff --git a/doc/classes/Node.xml b/doc/classes/Node.xml
index ae6cd9596c..aea4082dbe 100644
--- a/doc/classes/Node.xml
+++ b/doc/classes/Node.xml
@@ -383,7 +383,7 @@
 				Fetches a node. The [NodePath] can either be a relative path (from this node), or an absolute path (from the [member SceneTree.root]) to a node. If [param path] does not point to a valid node, generates an error and returns [code]null[/code]. Attempts to access methods on the return value will result in an [i]"Attempt to call &lt;method&gt; on a null instance."[/i] error.
 				[b]Note:[/b] Fetching by absolute path only works when the node is inside the scene tree (see [method is_inside_tree]).
 				[b]Example:[/b] Assume this method is called from the Character node, inside the following tree:
-				[codeblock]
+				[codeblock lang=text]
 				 ┖╴root
 				    ┠╴Character (you are here!)
 				    ┃  ┠╴Sword
@@ -514,8 +514,8 @@
 			<return type="String" />
 			<description>
 				Returns the tree as a [String]. Used mainly for debugging purposes. This version displays the path relative to the current node, and is good for copy/pasting into the [method get_node] function. It also can be used in game UI/UX.
-				[b]Example output:[/b]
-				[codeblock]
+				May print, for example:
+				[codeblock lang=text]
 				TheGame
 				TheGame/Menu
 				TheGame/Menu/Label
@@ -529,8 +529,8 @@
 			<return type="String" />
 			<description>
 				Similar to [method get_tree_string], this returns the tree as a [String]. This version displays a more graphical representation similar to what is displayed in the Scene Dock. It is useful for inspecting larger trees.
-				[b]Example output:[/b]
-				[codeblock]
+				May print, for example:
+				[codeblock lang=text]
 				 ┖╴TheGame
 				    ┠╴Menu
 				    ┃  ┠╴Label
@@ -716,8 +716,8 @@
 			<return type="void" />
 			<description>
 				Prints the node and its children to the console, recursively. The node does not have to be inside the tree. This method outputs [NodePath]s relative to this node, and is good for copy/pasting into [method get_node]. See also [method print_tree_pretty].
-				[b]Example output:[/b]
-				[codeblock]
+				May print, for example:
+				[codeblock lang=text]
 				.
 				Menu
 				Menu/Label
@@ -731,8 +731,8 @@
 			<return type="void" />
 			<description>
 				Prints the node and its children to the console, recursively. The node does not have to be inside the tree. Similar to [method print_tree], but the graphical representation looks like what is displayed in the editor's Scene dock. It is useful for inspecting larger trees.
-				[b]Example output:[/b]
-				[codeblock]
+				May print, for example:
+				[codeblock lang=text]
 				 ┖╴TheGame
 				    ┠╴Menu
 				    ┃  ┠╴Label
@@ -795,9 +795,8 @@
 			<return type="void" />
 			<param index="0" name="node" type="Node" />
 			<param index="1" name="keep_groups" type="bool" default="false" />
-			<param index="2" name="keep_children" type="bool" default="true" />
 			<description>
-				Replaces this node by the given [param node]. If [param keep_children] is [code]true[/code] all children of this node are moved to [param node].
+				Replaces this node by the given [param node]. All children of this node are moved to [param node].
 				If [param keep_groups] is [code]true[/code], the [param node] is added to the same groups that the replaced node is in (see [method add_to_group]).
 				[b]Warning:[/b] The replaced node is removed from the tree, but it is [b]not[/b] deleted. To prevent memory leaks, store a reference to the node in a variable, or use [method Object.free].
 			</description>
diff --git a/doc/classes/NodePath.xml b/doc/classes/NodePath.xml
index 6e0799e796..f294b64576 100644
--- a/doc/classes/NodePath.xml
+++ b/doc/classes/NodePath.xml
@@ -34,7 +34,7 @@
 		[b]Note:[/b] In a boolean context, a [NodePath] will evaluate to [code]false[/code] if it is empty ([code]NodePath("")[/code]). Otherwise, a [NodePath] will always evaluate to [code]true[/code].
 	</description>
 	<tutorials>
-		<link title="2D Role Playing Game Demo">https://godotengine.org/asset-library/asset/520</link>
+		<link title="2D Role Playing Game (RPG) Demo">https://godotengine.org/asset-library/asset/2729</link>
 	</tutorials>
 	<constructors>
 		<constructor name="NodePath">
diff --git a/doc/classes/OS.xml b/doc/classes/OS.xml
index bd1bd9afa7..de39901133 100644
--- a/doc/classes/OS.xml
+++ b/doc/classes/OS.xml
@@ -8,7 +8,7 @@
 		[b]Note:[/b] In Godot 4, [OS] functions related to window management, clipboard, and TTS were moved to the [DisplayServer] singleton (and the [Window] class). Functions related to time were removed and are only available in the [Time] class.
 	</description>
 	<tutorials>
-		<link title="OS Test Demo">https://godotengine.org/asset-library/asset/677</link>
+		<link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link>
 	</tutorials>
 	<methods>
 		<method name="alert">
diff --git a/doc/classes/Object.xml b/doc/classes/Object.xml
index b69326b6e0..c508591093 100644
--- a/doc/classes/Object.xml
+++ b/doc/classes/Object.xml
@@ -556,7 +556,7 @@
 				While all options have the same outcome ([code]button[/code]'s [signal BaseButton.button_down] signal will be connected to [code]_on_button_down[/code]), [b]option 3[/b] offers the best validation: it will print a compile-time error if either the [code]button_down[/code] [Signal] or the [code]_on_button_down[/code] [Callable] are not defined. On the other hand, [b]option 2[/b] only relies on string names and will only be able to validate either names at runtime: it will print a runtime error if [code]"button_down"[/code] doesn't correspond to a signal, or if [code]"_on_button_down"[/code] is not a registered method in the object [code]self[/code]. The main reason for using options 1, 2, or 4 would be if you actually need to use strings (e.g. to connect signals programmatically based on strings read from a configuration file). Otherwise, option 3 is the recommended (and fastest) method.
 				[b]Binding and passing parameters:[/b]
 				The syntax to bind parameters is through [method Callable.bind], which returns a copy of the [Callable] with its parameters bound.
-				When calling [method emit_signal], the signal parameters can be also passed. The examples below show the relationship between these signal parameters and bound parameters.
+				When calling [method emit_signal] or [method Signal.emit], the signal parameters can be also passed. The examples below show the relationship between these signal parameters and bound parameters.
 				[codeblocks]
 				[gdscript]
 				func _ready():
@@ -566,7 +566,7 @@
 				    player.hit.connect(_on_player_hit.bind("sword", 100))
 
 				    # Parameters added when emitting the signal are passed first.
-				    player.emit_signal("hit", "Dark lord", 5)
+				    player.hit.emit("Dark lord", 5)
 
 				# We pass two arguments when emitting (`hit_by`, `level`),
 				# and bind two more arguments when connecting (`weapon_type`, `damage`).
diff --git a/doc/classes/OccluderPolygon2D.xml b/doc/classes/OccluderPolygon2D.xml
index a78375ad01..36e1540b39 100644
--- a/doc/classes/OccluderPolygon2D.xml
+++ b/doc/classes/OccluderPolygon2D.xml
@@ -17,7 +17,6 @@
 		</member>
 		<member name="polygon" type="PackedVector2Array" setter="set_polygon" getter="get_polygon" default="PackedVector2Array()">
 			A [Vector2] array with the index for polygon's vertices positions.
-			[b]Note:[/b] The returned value is a copy of the underlying array, rather than a reference.
 		</member>
 	</members>
 	<constants>
diff --git a/doc/classes/PackedByteArray.xml b/doc/classes/PackedByteArray.xml
index 0c1532f61a..e179c111a2 100644
--- a/doc/classes/PackedByteArray.xml
+++ b/doc/classes/PackedByteArray.xml
@@ -6,6 +6,7 @@
 	<description>
 		An array specifically designed to hold bytes. Packs data tightly, so it saves memory for large array sizes.
 		[PackedByteArray] also provides methods to encode/decode various types to/from bytes. The way values are encoded is an implementation detail and shouldn't be relied upon when interacting with external apps.
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/PackedColorArray.xml b/doc/classes/PackedColorArray.xml
index ad96ba2490..57295cb1e3 100644
--- a/doc/classes/PackedColorArray.xml
+++ b/doc/classes/PackedColorArray.xml
@@ -5,6 +5,8 @@
 	</brief_description>
 	<description>
 		An array specifically designed to hold [Color]. Packs data tightly, so it saves memory for large array sizes.
+		[b]Differences between packed arrays, typed arrays, and untyped arrays:[/b] Packed arrays are generally faster to iterate on and modify compared to a typed array of the same type (e.g. [PackedColorArray] versus [code]Array[Color][/code]). Also, packed arrays consume less memory. As a downside, packed arrays are less flexible as they don't offer as many convenience methods such as [method Array.map]. Typed arrays are in turn faster to iterate on and modify than untyped arrays.
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/PackedFloat32Array.xml b/doc/classes/PackedFloat32Array.xml
index 6f1ecacca4..2db1386fd0 100644
--- a/doc/classes/PackedFloat32Array.xml
+++ b/doc/classes/PackedFloat32Array.xml
@@ -6,6 +6,7 @@
 	<description>
 		An array specifically designed to hold 32-bit floating-point values (float). Packs data tightly, so it saves memory for large array sizes.
 		If you need to pack 64-bit floats tightly, see [PackedFloat64Array].
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/PackedFloat64Array.xml b/doc/classes/PackedFloat64Array.xml
index aef5ab90ac..0bcee918ed 100644
--- a/doc/classes/PackedFloat64Array.xml
+++ b/doc/classes/PackedFloat64Array.xml
@@ -6,6 +6,8 @@
 	<description>
 		An array specifically designed to hold 64-bit floating-point values (double). Packs data tightly, so it saves memory for large array sizes.
 		If you only need to pack 32-bit floats tightly, see [PackedFloat32Array] for a more memory-friendly alternative.
+		[b]Differences between packed arrays, typed arrays, and untyped arrays:[/b] Packed arrays are generally faster to iterate on and modify compared to a typed array of the same type (e.g. [PackedFloat64Array] versus [code]Array[float][/code]). Also, packed arrays consume less memory. As a downside, packed arrays are less flexible as they don't offer as many convenience methods such as [method Array.map]. Typed arrays are in turn faster to iterate on and modify than untyped arrays.
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/PackedInt32Array.xml b/doc/classes/PackedInt32Array.xml
index e6396e2a93..93b2ae7394 100644
--- a/doc/classes/PackedInt32Array.xml
+++ b/doc/classes/PackedInt32Array.xml
@@ -6,6 +6,7 @@
 	<description>
 		An array specifically designed to hold 32-bit integer values. Packs data tightly, so it saves memory for large array sizes.
 		[b]Note:[/b] This type stores signed 32-bit integers, which means it can take values in the interval [code][-2^31, 2^31 - 1][/code], i.e. [code][-2147483648, 2147483647][/code]. Exceeding those bounds will wrap around. In comparison, [int] uses signed 64-bit integers which can hold much larger values. If you need to pack 64-bit integers tightly, see [PackedInt64Array].
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/PackedInt64Array.xml b/doc/classes/PackedInt64Array.xml
index 55024341c1..3d34165915 100644
--- a/doc/classes/PackedInt64Array.xml
+++ b/doc/classes/PackedInt64Array.xml
@@ -6,6 +6,8 @@
 	<description>
 		An array specifically designed to hold 64-bit integer values. Packs data tightly, so it saves memory for large array sizes.
 		[b]Note:[/b] This type stores signed 64-bit integers, which means it can take values in the interval [code][-2^63, 2^63 - 1][/code], i.e. [code][-9223372036854775808, 9223372036854775807][/code]. Exceeding those bounds will wrap around. If you only need to pack 32-bit integers tightly, see [PackedInt32Array] for a more memory-friendly alternative.
+		[b]Differences between packed arrays, typed arrays, and untyped arrays:[/b] Packed arrays are generally faster to iterate on and modify compared to a typed array of the same type (e.g. [PackedInt32Array] versus [code]Array[int][/code]). Also, packed arrays consume less memory. As a downside, packed arrays are less flexible as they don't offer as many convenience methods such as [method Array.map]. Typed arrays are in turn faster to iterate on and modify than untyped arrays.
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/PackedScene.xml b/doc/classes/PackedScene.xml
index 9324f99535..579b4c5b9f 100644
--- a/doc/classes/PackedScene.xml
+++ b/doc/classes/PackedScene.xml
@@ -73,7 +73,7 @@
 		[/codeblocks]
 	</description>
 	<tutorials>
-		<link title="2D Role Playing Game Demo">https://godotengine.org/asset-library/asset/520</link>
+		<link title="2D Role Playing Game (RPG) Demo">https://godotengine.org/asset-library/asset/2729</link>
 	</tutorials>
 	<methods>
 		<method name="can_instantiate" qualifiers="const">
diff --git a/doc/classes/PackedStringArray.xml b/doc/classes/PackedStringArray.xml
index f1b02272f3..621831c7a3 100644
--- a/doc/classes/PackedStringArray.xml
+++ b/doc/classes/PackedStringArray.xml
@@ -11,9 +11,11 @@
 		var string = " ".join(string_array)
 		print(string) # "hello world"
 		[/codeblock]
+		[b]Differences between packed arrays, typed arrays, and untyped arrays:[/b] Packed arrays are generally faster to iterate on and modify compared to a typed array of the same type (e.g. [PackedStringArray] versus [code]Array[String][/code]). Also, packed arrays consume less memory. As a downside, packed arrays are less flexible as they don't offer as many convenience methods such as [method Array.map]. Typed arrays are in turn faster to iterate on and modify than untyped arrays.
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
-		<link title="OS Test Demo">https://godotengine.org/asset-library/asset/677</link>
+		<link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link>
 	</tutorials>
 	<constructors>
 		<constructor name="PackedStringArray">
diff --git a/doc/classes/PackedVector2Array.xml b/doc/classes/PackedVector2Array.xml
index c73fea9114..14a3816353 100644
--- a/doc/classes/PackedVector2Array.xml
+++ b/doc/classes/PackedVector2Array.xml
@@ -5,9 +5,11 @@
 	</brief_description>
 	<description>
 		An array specifically designed to hold [Vector2]. Packs data tightly, so it saves memory for large array sizes.
+		[b]Differences between packed arrays, typed arrays, and untyped arrays:[/b] Packed arrays are generally faster to iterate on and modify compared to a typed array of the same type (e.g. [PackedVector3Array] versus [code]Array[Vector2][/code]). Also, packed arrays consume less memory. As a downside, packed arrays are less flexible as they don't offer as many convenience methods such as [method Array.map]. Typed arrays are in turn faster to iterate on and modify than untyped arrays.
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
-		<link title="2D Navigation Astar Demo">https://godotengine.org/asset-library/asset/519</link>
+		<link title="Grid-based Navigation with AStarGrid2D Demo">https://godotengine.org/asset-library/asset/2723</link>
 	</tutorials>
 	<constructors>
 		<constructor name="PackedVector2Array">
diff --git a/doc/classes/PackedVector3Array.xml b/doc/classes/PackedVector3Array.xml
index 89f258eaea..49220c6fd6 100644
--- a/doc/classes/PackedVector3Array.xml
+++ b/doc/classes/PackedVector3Array.xml
@@ -5,6 +5,8 @@
 	</brief_description>
 	<description>
 		An array specifically designed to hold [Vector3]. Packs data tightly, so it saves memory for large array sizes.
+		[b]Differences between packed arrays, typed arrays, and untyped arrays:[/b] Packed arrays are generally faster to iterate on and modify compared to a typed array of the same type (e.g. [PackedVector3Array] versus [code]Array[Vector3][/code]). Also, packed arrays consume less memory. As a downside, packed arrays are less flexible as they don't offer as many convenience methods such as [method Array.map]. Typed arrays are in turn faster to iterate on and modify than untyped arrays.
+		[b]Note:[/b] Packed arrays are always passed by reference. To get a copy of an array that can be modified independently of the original array, use [method duplicate]. This is [i]not[/i] the case for built-in properties and methods. The returned packed array of these are a copies, and changing it will [i]not[/i] affect the original value. To update a built-in property you need to modify the returned array, and then assign it to the property again.
 	</description>
 	<tutorials>
 	</tutorials>
diff --git a/doc/classes/Panel.xml b/doc/classes/Panel.xml
index db1fa1ccda..993c383c70 100644
--- a/doc/classes/Panel.xml
+++ b/doc/classes/Panel.xml
@@ -7,9 +7,8 @@
 		[Panel] is a GUI control that displays a [StyleBox]. See also [PanelContainer].
 	</description>
 	<tutorials>
-		<link title="2D Role Playing Game Demo">https://godotengine.org/asset-library/asset/520</link>
-		<link title="2D Finite State Machine Demo">https://godotengine.org/asset-library/asset/516</link>
-		<link title="3D Inverse Kinematics Demo">https://godotengine.org/asset-library/asset/523</link>
+		<link title="2D Role Playing Game (RPG) Demo">https://godotengine.org/asset-library/asset/2729</link>
+		<link title="Hierarchical Finite State Machine Demo">https://godotengine.org/asset-library/asset/2714</link>
 	</tutorials>
 	<theme_items>
 		<theme_item name="panel" data_type="style" type="StyleBox">
diff --git a/doc/classes/PanelContainer.xml b/doc/classes/PanelContainer.xml
index c6b3604cc4..c2972abc14 100644
--- a/doc/classes/PanelContainer.xml
+++ b/doc/classes/PanelContainer.xml
@@ -8,7 +8,7 @@
 	</description>
 	<tutorials>
 		<link title="Using Containers">$DOCS_URL/tutorials/ui/gui_containers.html</link>
-		<link title="2D Role Playing Game Demo">https://godotengine.org/asset-library/asset/520</link>
+		<link title="2D Role Playing Game (RPG) Demo">https://godotengine.org/asset-library/asset/2729</link>
 	</tutorials>
 	<members>
 		<member name="mouse_filter" type="int" setter="set_mouse_filter" getter="get_mouse_filter" overrides="Control" enum="Control.MouseFilter" default="0" />
diff --git a/doc/classes/PhysicalBone3D.xml b/doc/classes/PhysicalBone3D.xml
index c3b202e0a5..bce1a80526 100644
--- a/doc/classes/PhysicalBone3D.xml
+++ b/doc/classes/PhysicalBone3D.xml
@@ -13,7 +13,7 @@
 			<return type="void" />
 			<param index="0" name="state" type="PhysicsDirectBodyState3D" />
 			<description>
-				Called during physics processing, allowing you to read and safely modify the simulation state for the object. By default, it works in addition to the usual physics behavior, but the [member custom_integrator] property allows you to disable the default behavior and do fully custom force integration for a body.
+				Called during physics processing, allowing you to read and safely modify the simulation state for the object. By default, it is called before the standard force integration, but the [member custom_integrator] property allows you to disable the standard force integration and do fully custom force integration for a body.
 			</description>
 		</method>
 		<method name="apply_central_impulse">
@@ -67,7 +67,8 @@
 			If [code]true[/code], the body is deactivated when there is no movement, so it will not take part in the simulation until it is awakened by an external force.
 		</member>
 		<member name="custom_integrator" type="bool" setter="set_use_custom_integrator" getter="is_using_custom_integrator" default="false">
-			If [code]true[/code], internal force integration will be disabled (like gravity or air friction) for this body. Other than collision response, the body will only move as determined by the [method _integrate_forces] function, if defined.
+			If [code]true[/code], the standard force integration (like gravity or damping) will be disabled for this body. Other than collision response, the body will only move as determined by the [method _integrate_forces] method, if that virtual method is overridden.
+			Setting this property will call the method [method PhysicsServer3D.body_set_omit_force_integration] internally.
 		</member>
 		<member name="friction" type="float" setter="set_friction" getter="get_friction" default="1.0">
 			The body's friction, from [code]0[/code] (frictionless) to [code]1[/code] (max friction).
diff --git a/doc/classes/PhysicsDirectBodyState2D.xml b/doc/classes/PhysicsDirectBodyState2D.xml
index 7051663a78..d60cc1ee6b 100644
--- a/doc/classes/PhysicsDirectBodyState2D.xml
+++ b/doc/classes/PhysicsDirectBodyState2D.xml
@@ -202,7 +202,7 @@
 		<method name="integrate_forces">
 			<return type="void" />
 			<description>
-				Calls the built-in force integration code.
+				Updates the body's linear and angular velocity by applying gravity and damping for the equivalent of one physics tick.
 			</description>
 		</method>
 		<method name="set_constant_force">
diff --git a/doc/classes/PhysicsDirectBodyState2DExtension.xml b/doc/classes/PhysicsDirectBodyState2DExtension.xml
index 04612b461e..932c1c8352 100644
--- a/doc/classes/PhysicsDirectBodyState2DExtension.xml
+++ b/doc/classes/PhysicsDirectBodyState2DExtension.xml
@@ -14,6 +14,7 @@
 			<return type="void" />
 			<param index="0" name="force" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.add_constant_central_force].
 			</description>
 		</method>
 		<method name="_add_constant_force" qualifiers="virtual">
@@ -21,24 +22,28 @@
 			<param index="0" name="force" type="Vector2" />
 			<param index="1" name="position" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.add_constant_force].
 			</description>
 		</method>
 		<method name="_add_constant_torque" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="torque" type="float" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.add_constant_torque].
 			</description>
 		</method>
 		<method name="_apply_central_force" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="force" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.apply_central_force].
 			</description>
 		</method>
 		<method name="_apply_central_impulse" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="impulse" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.apply_central_impulse].
 			</description>
 		</method>
 		<method name="_apply_force" qualifiers="virtual">
@@ -46,6 +51,7 @@
 			<param index="0" name="force" type="Vector2" />
 			<param index="1" name="position" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.apply_force].
 			</description>
 		</method>
 		<method name="_apply_impulse" qualifiers="virtual">
@@ -53,211 +59,249 @@
 			<param index="0" name="impulse" type="Vector2" />
 			<param index="1" name="position" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.apply_impulse].
 			</description>
 		</method>
 		<method name="_apply_torque" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="torque" type="float" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.apply_torque].
 			</description>
 		</method>
 		<method name="_apply_torque_impulse" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="impulse" type="float" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.apply_torque_impulse].
 			</description>
 		</method>
 		<method name="_get_angular_velocity" qualifiers="virtual const">
 			<return type="float" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.angular_velocity] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_center_of_mass" qualifiers="virtual const">
 			<return type="Vector2" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.center_of_mass] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_center_of_mass_local" qualifiers="virtual const">
 			<return type="Vector2" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.center_of_mass_local] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_constant_force" qualifiers="virtual const">
 			<return type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_constant_force].
 			</description>
 		</method>
 		<method name="_get_constant_torque" qualifiers="virtual const">
 			<return type="float" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_constant_torque].
 			</description>
 		</method>
 		<method name="_get_contact_collider" qualifiers="virtual const">
 			<return type="RID" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_collider].
 			</description>
 		</method>
 		<method name="_get_contact_collider_id" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_collider_id].
 			</description>
 		</method>
 		<method name="_get_contact_collider_object" qualifiers="virtual const">
 			<return type="Object" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_collider_object].
 			</description>
 		</method>
 		<method name="_get_contact_collider_position" qualifiers="virtual const">
 			<return type="Vector2" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_collider_position].
 			</description>
 		</method>
 		<method name="_get_contact_collider_shape" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_collider_shape].
 			</description>
 		</method>
 		<method name="_get_contact_collider_velocity_at_position" qualifiers="virtual const">
 			<return type="Vector2" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_collider_velocity_at_position].
 			</description>
 		</method>
 		<method name="_get_contact_count" qualifiers="virtual const">
 			<return type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_count].
 			</description>
 		</method>
 		<method name="_get_contact_impulse" qualifiers="virtual const">
 			<return type="Vector2" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_impulse].
 			</description>
 		</method>
 		<method name="_get_contact_local_normal" qualifiers="virtual const">
 			<return type="Vector2" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_local_normal].
 			</description>
 		</method>
 		<method name="_get_contact_local_position" qualifiers="virtual const">
 			<return type="Vector2" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_local_position].
 			</description>
 		</method>
 		<method name="_get_contact_local_shape" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_local_shape].
 			</description>
 		</method>
 		<method name="_get_contact_local_velocity_at_position" qualifiers="virtual const">
 			<return type="Vector2" />
 			<param index="0" name="contact_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_contact_local_velocity_at_position].
 			</description>
 		</method>
 		<method name="_get_inverse_inertia" qualifiers="virtual const">
 			<return type="float" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.inverse_inertia] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_inverse_mass" qualifiers="virtual const">
 			<return type="float" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.inverse_mass] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_linear_velocity" qualifiers="virtual const">
 			<return type="Vector2" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.linear_velocity] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_space_state" qualifiers="virtual">
 			<return type="PhysicsDirectSpaceState2D" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_space_state].
 			</description>
 		</method>
 		<method name="_get_step" qualifiers="virtual const">
 			<return type="float" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.step] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_total_angular_damp" qualifiers="virtual const">
 			<return type="float" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.total_angular_damp] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_total_gravity" qualifiers="virtual const">
 			<return type="Vector2" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.total_gravity] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_total_linear_damp" qualifiers="virtual const">
 			<return type="float" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.total_linear_damp] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_transform" qualifiers="virtual const">
 			<return type="Transform2D" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.transform] and its respective getter.
 			</description>
 		</method>
 		<method name="_get_velocity_at_local_position" qualifiers="virtual const">
 			<return type="Vector2" />
 			<param index="0" name="local_position" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.get_velocity_at_local_position].
 			</description>
 		</method>
 		<method name="_integrate_forces" qualifiers="virtual">
 			<return type="void" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.integrate_forces].
 			</description>
 		</method>
 		<method name="_is_sleeping" qualifiers="virtual const">
 			<return type="bool" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.sleeping] and its respective getter.
 			</description>
 		</method>
 		<method name="_set_angular_velocity" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="velocity" type="float" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.angular_velocity] and its respective setter.
 			</description>
 		</method>
 		<method name="_set_constant_force" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="force" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.set_constant_force].
 			</description>
 		</method>
 		<method name="_set_constant_torque" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="torque" type="float" />
 			<description>
+				Overridable version of [method PhysicsDirectBodyState2D.set_constant_torque].
 			</description>
 		</method>
 		<method name="_set_linear_velocity" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="velocity" type="Vector2" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.linear_velocity] and its respective setter.
 			</description>
 		</method>
 		<method name="_set_sleep_state" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="enabled" type="bool" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.sleeping] and its respective setter.
 			</description>
 		</method>
 		<method name="_set_transform" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="transform" type="Transform2D" />
 			<description>
+				Implement to override the behavior of [member PhysicsDirectBodyState2D.transform] and its respective setter.
 			</description>
 		</method>
 	</methods>
diff --git a/doc/classes/PhysicsDirectBodyState3D.xml b/doc/classes/PhysicsDirectBodyState3D.xml
index 42c65763aa..e8c3f3f89d 100644
--- a/doc/classes/PhysicsDirectBodyState3D.xml
+++ b/doc/classes/PhysicsDirectBodyState3D.xml
@@ -202,7 +202,7 @@
 		<method name="integrate_forces">
 			<return type="void" />
 			<description>
-				Calls the built-in force integration code.
+				Updates the body's linear and angular velocity by applying gravity and damping for the equivalent of one physics tick.
 			</description>
 		</method>
 		<method name="set_constant_force">
diff --git a/doc/classes/PhysicsServer2D.xml b/doc/classes/PhysicsServer2D.xml
index 8be92edbad..d40326fa21 100644
--- a/doc/classes/PhysicsServer2D.xml
+++ b/doc/classes/PhysicsServer2D.xml
@@ -501,7 +501,7 @@
 			<return type="bool" />
 			<param index="0" name="body" type="RID" />
 			<description>
-				Returns [code]true[/code] if the body uses a callback function to calculate its own physics (see [method body_set_force_integration_callback]).
+				Returns [code]true[/code] if the body is omitting the standard force integration. See [method body_set_omit_force_integration].
 			</description>
 		</method>
 		<method name="body_remove_collision_exception">
@@ -592,11 +592,12 @@
 			<param index="1" name="callable" type="Callable" />
 			<param index="2" name="userdata" type="Variant" default="null" />
 			<description>
-				Sets the function used to calculate physics for the body, if that body allows it (see [method body_set_omit_force_integration]).
-				The force integration function takes the following two parameters:
-				1. a [PhysicsDirectBodyState2D] [code]state[/code]: used to retrieve and modify the body's state,
-				2. a [Variant] [param userdata]: optional user data.
-				[b]Note:[/b] This callback is currently not called in Godot Physics.
+				Sets the body's custom force integration callback function to [param callable]. Use an empty [Callable] ([code skip-lint]Callable()[/code]) to clear the custom callback.
+				The function [param callable] will be called every physics tick, before the standard force integration (see [method body_set_omit_force_integration]). It can be used for example to update the body's linear and angular velocity based on contact with other bodies.
+				If [param userdata] is not [code]null[/code], the function [param callable] must take the following two parameters:
+				1. [code]state[/code]: a [PhysicsDirectBodyState2D] used to retrieve and modify the body's state,
+				2. [code skip-lint]userdata[/code]: a [Variant]; its value will be the [param userdata] passed into this method.
+				If [param userdata] is [code]null[/code], then [param callable] must take only the [code]state[/code] parameter.
 			</description>
 		</method>
 		<method name="body_set_max_contacts_reported">
@@ -620,7 +621,8 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="enable" type="bool" />
 			<description>
-				Sets whether the body uses a callback function to calculate its own physics (see [method body_set_force_integration_callback]).
+				Sets whether the body omits the standard force integration. If [param enable] is [code]true[/code], the body will not automatically use applied forces, torques, and damping to update the body's linear and angular velocity. In this case, [method body_set_force_integration_callback] can be used to manually update the linear and angular velocity instead.
+				This method is called when the property [member RigidBody2D.custom_integrator] is set.
 			</description>
 		</method>
 		<method name="body_set_param">
diff --git a/doc/classes/PhysicsServer2DExtension.xml b/doc/classes/PhysicsServer2DExtension.xml
index 8d9a171337..815fc742d1 100644
--- a/doc/classes/PhysicsServer2DExtension.xml
+++ b/doc/classes/PhysicsServer2DExtension.xml
@@ -17,6 +17,7 @@
 			<param index="2" name="transform" type="Transform2D" />
 			<param index="3" name="disabled" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_add_shape].
 			</description>
 		</method>
 		<method name="_area_attach_canvas_instance_id" qualifiers="virtual">
@@ -24,6 +25,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="id" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_attach_canvas_instance_id].
 			</description>
 		</method>
 		<method name="_area_attach_object_instance_id" qualifiers="virtual">
@@ -31,41 +33,48 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="id" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_attach_object_instance_id].
 			</description>
 		</method>
 		<method name="_area_clear_shapes" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="area" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_clear_shapes].
 			</description>
 		</method>
 		<method name="_area_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_create].
 			</description>
 		</method>
 		<method name="_area_get_canvas_instance_id" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="area" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_canvas_instance_id].
 			</description>
 		</method>
 		<method name="_area_get_collision_layer" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="area" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_collision_layer].
 			</description>
 		</method>
 		<method name="_area_get_collision_mask" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="area" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_collision_mask].
 			</description>
 		</method>
 		<method name="_area_get_object_instance_id" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="area" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_object_instance_id].
 			</description>
 		</method>
 		<method name="_area_get_param" qualifiers="virtual const">
@@ -73,6 +82,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.AreaParameter" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_param].
 			</description>
 		</method>
 		<method name="_area_get_shape" qualifiers="virtual const">
@@ -80,12 +90,14 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="shape_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_shape].
 			</description>
 		</method>
 		<method name="_area_get_shape_count" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="area" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_shape_count].
 			</description>
 		</method>
 		<method name="_area_get_shape_transform" qualifiers="virtual const">
@@ -93,18 +105,21 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="shape_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_shape_transform].
 			</description>
 		</method>
 		<method name="_area_get_space" qualifiers="virtual const">
 			<return type="RID" />
 			<param index="0" name="area" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_space].
 			</description>
 		</method>
 		<method name="_area_get_transform" qualifiers="virtual const">
 			<return type="Transform2D" />
 			<param index="0" name="area" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_get_transform].
 			</description>
 		</method>
 		<method name="_area_remove_shape" qualifiers="virtual">
@@ -112,6 +127,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="shape_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_remove_shape].
 			</description>
 		</method>
 		<method name="_area_set_area_monitor_callback" qualifiers="virtual">
@@ -119,6 +135,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="callback" type="Callable" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_area_monitor_callback].
 			</description>
 		</method>
 		<method name="_area_set_collision_layer" qualifiers="virtual">
@@ -126,6 +143,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="layer" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_collision_layer].
 			</description>
 		</method>
 		<method name="_area_set_collision_mask" qualifiers="virtual">
@@ -133,6 +151,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="mask" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_collision_mask].
 			</description>
 		</method>
 		<method name="_area_set_monitor_callback" qualifiers="virtual">
@@ -140,6 +159,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="callback" type="Callable" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_monitor_callback].
 			</description>
 		</method>
 		<method name="_area_set_monitorable" qualifiers="virtual">
@@ -147,6 +167,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="monitorable" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_monitorable].
 			</description>
 		</method>
 		<method name="_area_set_param" qualifiers="virtual">
@@ -155,6 +176,7 @@
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.AreaParameter" />
 			<param index="2" name="value" type="Variant" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_param].
 			</description>
 		</method>
 		<method name="_area_set_pickable" qualifiers="virtual">
@@ -162,6 +184,8 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="pickable" type="bool" />
 			<description>
+				If set to [code]true[/code], allows the area with the given [RID] to detect mouse inputs when the mouse cursor is hovering on it.
+				Overridable version of [PhysicsServer2D]'s internal [code]area_set_pickable[/code] method. Corresponds to [member PhysicsBody2D.input_pickable].
 			</description>
 		</method>
 		<method name="_area_set_shape" qualifiers="virtual">
@@ -170,6 +194,7 @@
 			<param index="1" name="shape_idx" type="int" />
 			<param index="2" name="shape" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_shape].
 			</description>
 		</method>
 		<method name="_area_set_shape_disabled" qualifiers="virtual">
@@ -178,6 +203,7 @@
 			<param index="1" name="shape_idx" type="int" />
 			<param index="2" name="disabled" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_shape_disabled].
 			</description>
 		</method>
 		<method name="_area_set_shape_transform" qualifiers="virtual">
@@ -186,6 +212,7 @@
 			<param index="1" name="shape_idx" type="int" />
 			<param index="2" name="transform" type="Transform2D" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_shape_transform].
 			</description>
 		</method>
 		<method name="_area_set_space" qualifiers="virtual">
@@ -193,6 +220,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="space" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_space].
 			</description>
 		</method>
 		<method name="_area_set_transform" qualifiers="virtual">
@@ -200,6 +228,7 @@
 			<param index="0" name="area" type="RID" />
 			<param index="1" name="transform" type="Transform2D" />
 			<description>
+				Overridable version of [method PhysicsServer2D.area_set_transform].
 			</description>
 		</method>
 		<method name="_body_add_collision_exception" qualifiers="virtual">
@@ -207,6 +236,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="excepted_body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_add_collision_exception].
 			</description>
 		</method>
 		<method name="_body_add_constant_central_force" qualifiers="virtual">
@@ -214,6 +244,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="force" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_add_constant_central_force].
 			</description>
 		</method>
 		<method name="_body_add_constant_force" qualifiers="virtual">
@@ -222,6 +253,7 @@
 			<param index="1" name="force" type="Vector2" />
 			<param index="2" name="position" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_add_constant_force].
 			</description>
 		</method>
 		<method name="_body_add_constant_torque" qualifiers="virtual">
@@ -229,6 +261,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="torque" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_add_constant_torque].
 			</description>
 		</method>
 		<method name="_body_add_shape" qualifiers="virtual">
@@ -238,6 +271,7 @@
 			<param index="2" name="transform" type="Transform2D" />
 			<param index="3" name="disabled" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_add_shape].
 			</description>
 		</method>
 		<method name="_body_apply_central_force" qualifiers="virtual">
@@ -245,6 +279,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="force" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_apply_central_force].
 			</description>
 		</method>
 		<method name="_body_apply_central_impulse" qualifiers="virtual">
@@ -252,6 +287,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="impulse" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_apply_central_impulse].
 			</description>
 		</method>
 		<method name="_body_apply_force" qualifiers="virtual">
@@ -260,6 +296,7 @@
 			<param index="1" name="force" type="Vector2" />
 			<param index="2" name="position" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_apply_force].
 			</description>
 		</method>
 		<method name="_body_apply_impulse" qualifiers="virtual">
@@ -268,6 +305,7 @@
 			<param index="1" name="impulse" type="Vector2" />
 			<param index="2" name="position" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_apply_impulse].
 			</description>
 		</method>
 		<method name="_body_apply_torque" qualifiers="virtual">
@@ -275,6 +313,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="torque" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_apply_torque].
 			</description>
 		</method>
 		<method name="_body_apply_torque_impulse" qualifiers="virtual">
@@ -282,6 +321,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="impulse" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_apply_torque_impulse].
 			</description>
 		</method>
 		<method name="_body_attach_canvas_instance_id" qualifiers="virtual">
@@ -289,6 +329,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="id" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_attach_canvas_instance_id].
 			</description>
 		</method>
 		<method name="_body_attach_object_instance_id" qualifiers="virtual">
@@ -296,12 +337,14 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="id" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_attach_object_instance_id].
 			</description>
 		</method>
 		<method name="_body_clear_shapes" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_clear_shapes].
 			</description>
 		</method>
 		<method name="_body_collide_shape" qualifiers="virtual">
@@ -315,89 +358,107 @@
 			<param index="6" name="result_max" type="int" />
 			<param index="7" name="result_count" type="int32_t*" />
 			<description>
+				Given a [param body], a [param shape], and their respective parameters, this method should return [code]true[/code] if a collision between the two would occur, with additional details passed in [param results].
+				Overridable version of [PhysicsServer2D]'s internal [code]shape_collide[/code] method. Corresponds to [method PhysicsDirectSpaceState2D.collide_shape].
 			</description>
 		</method>
 		<method name="_body_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_create].
 			</description>
 		</method>
 		<method name="_body_get_canvas_instance_id" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_canvas_instance_id].
 			</description>
 		</method>
 		<method name="_body_get_collision_exceptions" qualifiers="virtual const">
 			<return type="RID[]" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Returns the [RID]s of all bodies added as collision exceptions for the given [param body]. See also [method _body_add_collision_exception] and [method _body_remove_collision_exception].
+				Overridable version of [PhysicsServer2D]'s internal [code]body_get_collision_exceptions[/code] method. Corresponds to [method PhysicsBody2D.get_collision_exceptions].
 			</description>
 		</method>
 		<method name="_body_get_collision_layer" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_collision_layer].
 			</description>
 		</method>
 		<method name="_body_get_collision_mask" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_collision_mask].
 			</description>
 		</method>
 		<method name="_body_get_collision_priority" qualifiers="virtual const">
 			<return type="float" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_collision_priority].
 			</description>
 		</method>
 		<method name="_body_get_constant_force" qualifiers="virtual const">
 			<return type="Vector2" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_constant_force].
 			</description>
 		</method>
 		<method name="_body_get_constant_torque" qualifiers="virtual const">
 			<return type="float" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_constant_torque].
 			</description>
 		</method>
 		<method name="_body_get_contacts_reported_depth_threshold" qualifiers="virtual const">
 			<return type="float" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [PhysicsServer2D]'s internal [code]body_get_contacts_reported_depth_threshold[/code] method.
+				[b]Note:[/b] This method is currently unused by Godot's default physics implementation.
 			</description>
 		</method>
 		<method name="_body_get_continuous_collision_detection_mode" qualifiers="virtual const">
 			<return type="int" enum="PhysicsServer2D.CCDMode" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_continuous_collision_detection_mode].
 			</description>
 		</method>
 		<method name="_body_get_direct_state" qualifiers="virtual">
 			<return type="PhysicsDirectBodyState2D" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_direct_state].
 			</description>
 		</method>
 		<method name="_body_get_max_contacts_reported" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_max_contacts_reported].
 			</description>
 		</method>
 		<method name="_body_get_mode" qualifiers="virtual const">
 			<return type="int" enum="PhysicsServer2D.BodyMode" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_mode].
 			</description>
 		</method>
 		<method name="_body_get_object_instance_id" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_object_instance_id].
 			</description>
 		</method>
 		<method name="_body_get_param" qualifiers="virtual const">
@@ -405,6 +466,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.BodyParameter" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_param].
 			</description>
 		</method>
 		<method name="_body_get_shape" qualifiers="virtual const">
@@ -412,12 +474,14 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="shape_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_shape].
 			</description>
 		</method>
 		<method name="_body_get_shape_count" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_shape_count].
 			</description>
 		</method>
 		<method name="_body_get_shape_transform" qualifiers="virtual const">
@@ -425,12 +489,14 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="shape_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_shape_transform].
 			</description>
 		</method>
 		<method name="_body_get_space" qualifiers="virtual const">
 			<return type="RID" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_space].
 			</description>
 		</method>
 		<method name="_body_get_state" qualifiers="virtual const">
@@ -438,12 +504,14 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="state" type="int" enum="PhysicsServer2D.BodyState" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_get_state].
 			</description>
 		</method>
 		<method name="_body_is_omitting_force_integration" qualifiers="virtual const">
 			<return type="bool" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_is_omitting_force_integration].
 			</description>
 		</method>
 		<method name="_body_remove_collision_exception" qualifiers="virtual">
@@ -451,6 +519,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="excepted_body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_remove_collision_exception].
 			</description>
 		</method>
 		<method name="_body_remove_shape" qualifiers="virtual">
@@ -458,12 +527,14 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="shape_idx" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_remove_shape].
 			</description>
 		</method>
 		<method name="_body_reset_mass_properties" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_reset_mass_properties].
 			</description>
 		</method>
 		<method name="_body_set_axis_velocity" qualifiers="virtual">
@@ -471,6 +542,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="axis_velocity" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_axis_velocity].
 			</description>
 		</method>
 		<method name="_body_set_collision_layer" qualifiers="virtual">
@@ -478,6 +550,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="layer" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_collision_layer].
 			</description>
 		</method>
 		<method name="_body_set_collision_mask" qualifiers="virtual">
@@ -485,6 +558,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="mask" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_collision_mask].
 			</description>
 		</method>
 		<method name="_body_set_collision_priority" qualifiers="virtual">
@@ -492,6 +566,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="priority" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_collision_priority].
 			</description>
 		</method>
 		<method name="_body_set_constant_force" qualifiers="virtual">
@@ -499,6 +574,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="force" type="Vector2" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_constant_force].
 			</description>
 		</method>
 		<method name="_body_set_constant_torque" qualifiers="virtual">
@@ -506,6 +582,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="torque" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_constant_torque].
 			</description>
 		</method>
 		<method name="_body_set_contacts_reported_depth_threshold" qualifiers="virtual">
@@ -513,6 +590,8 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="threshold" type="float" />
 			<description>
+				Overridable version of [PhysicsServer2D]'s internal [code]body_set_contacts_reported_depth_threshold[/code] method.
+				[b]Note:[/b] This method is currently unused by Godot's default physics implementation.
 			</description>
 		</method>
 		<method name="_body_set_continuous_collision_detection_mode" qualifiers="virtual">
@@ -520,6 +599,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="mode" type="int" enum="PhysicsServer2D.CCDMode" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_continuous_collision_detection_mode].
 			</description>
 		</method>
 		<method name="_body_set_force_integration_callback" qualifiers="virtual">
@@ -528,6 +608,7 @@
 			<param index="1" name="callable" type="Callable" />
 			<param index="2" name="userdata" type="Variant" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_force_integration_callback].
 			</description>
 		</method>
 		<method name="_body_set_max_contacts_reported" qualifiers="virtual">
@@ -535,6 +616,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="amount" type="int" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_max_contacts_reported].
 			</description>
 		</method>
 		<method name="_body_set_mode" qualifiers="virtual">
@@ -542,6 +624,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="mode" type="int" enum="PhysicsServer2D.BodyMode" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_mode].
 			</description>
 		</method>
 		<method name="_body_set_omit_force_integration" qualifiers="virtual">
@@ -549,6 +632,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="enable" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_omit_force_integration].
 			</description>
 		</method>
 		<method name="_body_set_param" qualifiers="virtual">
@@ -557,6 +641,7 @@
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.BodyParameter" />
 			<param index="2" name="value" type="Variant" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_param].
 			</description>
 		</method>
 		<method name="_body_set_pickable" qualifiers="virtual">
@@ -564,6 +649,8 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="pickable" type="bool" />
 			<description>
+				If set to [code]true[/code], allows the body with the given [RID] to detect mouse inputs when the mouse cursor is hovering on it.
+				Overridable version of [PhysicsServer2D]'s internal [code]body_set_pickable[/code] method. Corresponds to [member PhysicsBody2D.input_pickable].
 			</description>
 		</method>
 		<method name="_body_set_shape" qualifiers="virtual">
@@ -572,6 +659,7 @@
 			<param index="1" name="shape_idx" type="int" />
 			<param index="2" name="shape" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_shape].
 			</description>
 		</method>
 		<method name="_body_set_shape_as_one_way_collision" qualifiers="virtual">
@@ -581,6 +669,7 @@
 			<param index="2" name="enable" type="bool" />
 			<param index="3" name="margin" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_shape_as_one_way_collision].
 			</description>
 		</method>
 		<method name="_body_set_shape_disabled" qualifiers="virtual">
@@ -589,6 +678,7 @@
 			<param index="1" name="shape_idx" type="int" />
 			<param index="2" name="disabled" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_shape_disabled].
 			</description>
 		</method>
 		<method name="_body_set_shape_transform" qualifiers="virtual">
@@ -597,6 +687,7 @@
 			<param index="1" name="shape_idx" type="int" />
 			<param index="2" name="transform" type="Transform2D" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_shape_transform].
 			</description>
 		</method>
 		<method name="_body_set_space" qualifiers="virtual">
@@ -604,6 +695,7 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="space" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_space].
 			</description>
 		</method>
 		<method name="_body_set_state" qualifiers="virtual">
@@ -612,6 +704,7 @@
 			<param index="1" name="state" type="int" enum="PhysicsServer2D.BodyState" />
 			<param index="2" name="value" type="Variant" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_set_state].
 			</description>
 		</method>
 		<method name="_body_set_state_sync_callback" qualifiers="virtual">
@@ -619,6 +712,8 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="callable" type="Callable" />
 			<description>
+				Assigns the [param body] to call the given [param callable] during the synchronization phase of the loop, before [method _step] is called. See also [method _sync].
+				Overridable version of [PhysicsServer2D]'s internal [code]body_set_state_sync_callback[/code] method.
 			</description>
 		</method>
 		<method name="_body_test_motion" qualifiers="virtual const">
@@ -631,26 +726,31 @@
 			<param index="5" name="recovery_as_collision" type="bool" />
 			<param index="6" name="result" type="PhysicsServer2DExtensionMotionResult*" />
 			<description>
+				Overridable version of [method PhysicsServer2D.body_test_motion]. Unlike the exposed implementation, this method does not receive all of the arguments inside a [PhysicsTestMotionParameters2D].
 			</description>
 		</method>
 		<method name="_capsule_shape_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.capsule_shape_create].
 			</description>
 		</method>
 		<method name="_circle_shape_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.circle_shape_create].
 			</description>
 		</method>
 		<method name="_concave_polygon_shape_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.concave_polygon_shape_create].
 			</description>
 		</method>
 		<method name="_convex_polygon_shape_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.convex_polygon_shape_create].
 			</description>
 		</method>
 		<method name="_damped_spring_joint_get_param" qualifiers="virtual const">
@@ -658,6 +758,7 @@
 			<param index="0" name="joint" type="RID" />
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.DampedSpringParam" />
 			<description>
+				Overridable version of [method PhysicsServer2D.damped_spring_joint_get_param].
 			</description>
 		</method>
 		<method name="_damped_spring_joint_set_param" qualifiers="virtual">
@@ -666,54 +767,69 @@
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.DampedSpringParam" />
 			<param index="2" name="value" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.damped_spring_joint_set_param].
 			</description>
 		</method>
 		<method name="_end_sync" qualifiers="virtual">
 			<return type="void" />
 			<description>
+				Called to indicate that the physics server has stopped synchronizing. It is in the loop's iteration/physics phase, and can access physics objects even if running on a separate thread. See also [method _sync].
+				Overridable version of [PhysicsServer2D]'s internal [code]end_sync[/code] method.
 			</description>
 		</method>
 		<method name="_finish" qualifiers="virtual">
 			<return type="void" />
 			<description>
+				Called when the main loop finalizes to shut down the physics server. See also [method MainLoop._finalize] and [method _init].
+				Overridable version of [PhysicsServer2D]'s internal [code]finish[/code] method.
 			</description>
 		</method>
 		<method name="_flush_queries" qualifiers="virtual">
 			<return type="void" />
 			<description>
+				Called every physics step before [method _step] to process all remaining queries.
+				Overridable version of [PhysicsServer2D]'s internal [code]flush_queries[/code] method.
 			</description>
 		</method>
 		<method name="_free_rid" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="rid" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.free_rid].
 			</description>
 		</method>
 		<method name="_get_process_info" qualifiers="virtual">
 			<return type="int" />
 			<param index="0" name="process_info" type="int" enum="PhysicsServer2D.ProcessInfo" />
 			<description>
+				Overridable version of [method PhysicsServer2D.get_process_info].
 			</description>
 		</method>
 		<method name="_init" qualifiers="virtual">
 			<return type="void" />
 			<description>
+				Called when the main loop is initialized and creates a new instance of this physics server. See also [method MainLoop._initialize] and [method _finish].
+				Overridable version of [PhysicsServer2D]'s internal [code]init[/code] method.
 			</description>
 		</method>
 		<method name="_is_flushing_queries" qualifiers="virtual const">
 			<return type="bool" />
 			<description>
+				Overridable method that should return [code]true[/code] when the physics server is processing queries. See also [method _flush_queries].
+				Overridable version of [PhysicsServer2D]'s internal [code]is_flushing_queries[/code] method.
 			</description>
 		</method>
 		<method name="_joint_clear" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="joint" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_clear].
 			</description>
 		</method>
 		<method name="_joint_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_create].
 			</description>
 		</method>
 		<method name="_joint_disable_collisions_between_bodies" qualifiers="virtual">
@@ -721,6 +837,7 @@
 			<param index="0" name="joint" type="RID" />
 			<param index="1" name="disable" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_disable_collisions_between_bodies].
 			</description>
 		</method>
 		<method name="_joint_get_param" qualifiers="virtual const">
@@ -728,18 +845,21 @@
 			<param index="0" name="joint" type="RID" />
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.JointParam" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_get_param].
 			</description>
 		</method>
 		<method name="_joint_get_type" qualifiers="virtual const">
 			<return type="int" enum="PhysicsServer2D.JointType" />
 			<param index="0" name="joint" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_get_type].
 			</description>
 		</method>
 		<method name="_joint_is_disabled_collisions_between_bodies" qualifiers="virtual const">
 			<return type="bool" />
 			<param index="0" name="joint" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_is_disabled_collisions_between_bodies].
 			</description>
 		</method>
 		<method name="_joint_make_damped_spring" qualifiers="virtual">
@@ -750,6 +870,7 @@
 			<param index="3" name="body_a" type="RID" />
 			<param index="4" name="body_b" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_make_damped_spring].
 			</description>
 		</method>
 		<method name="_joint_make_groove" qualifiers="virtual">
@@ -761,6 +882,7 @@
 			<param index="4" name="body_a" type="RID" />
 			<param index="5" name="body_b" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_make_groove].
 			</description>
 		</method>
 		<method name="_joint_make_pin" qualifiers="virtual">
@@ -770,6 +892,7 @@
 			<param index="2" name="body_a" type="RID" />
 			<param index="3" name="body_b" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_make_pin].
 			</description>
 		</method>
 		<method name="_joint_set_param" qualifiers="virtual">
@@ -778,6 +901,7 @@
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.JointParam" />
 			<param index="2" name="value" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.joint_set_param].
 			</description>
 		</method>
 		<method name="_pin_joint_get_flag" qualifiers="virtual const">
@@ -785,6 +909,7 @@
 			<param index="0" name="joint" type="RID" />
 			<param index="1" name="flag" type="int" enum="PhysicsServer2D.PinJointFlag" />
 			<description>
+				Overridable version of [method PhysicsServer2D.pin_joint_get_flag].
 			</description>
 		</method>
 		<method name="_pin_joint_get_param" qualifiers="virtual const">
@@ -792,6 +917,7 @@
 			<param index="0" name="joint" type="RID" />
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.PinJointParam" />
 			<description>
+				Overridable version of [method PhysicsServer2D.pin_joint_get_param].
 			</description>
 		</method>
 		<method name="_pin_joint_set_flag" qualifiers="virtual">
@@ -800,6 +926,7 @@
 			<param index="1" name="flag" type="int" enum="PhysicsServer2D.PinJointFlag" />
 			<param index="2" name="enabled" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.pin_joint_set_flag].
 			</description>
 		</method>
 		<method name="_pin_joint_set_param" qualifiers="virtual">
@@ -808,27 +935,32 @@
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.PinJointParam" />
 			<param index="2" name="value" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.pin_joint_set_param].
 			</description>
 		</method>
 		<method name="_rectangle_shape_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.rectangle_shape_create].
 			</description>
 		</method>
 		<method name="_segment_shape_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.segment_shape_create].
 			</description>
 		</method>
 		<method name="_separation_ray_shape_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.separation_ray_shape_create].
 			</description>
 		</method>
 		<method name="_set_active" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="active" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.set_active].
 			</description>
 		</method>
 		<method name="_shape_collide" qualifiers="virtual">
@@ -843,24 +975,30 @@
 			<param index="7" name="result_max" type="int" />
 			<param index="8" name="result_count" type="int32_t*" />
 			<description>
+				Given two shapes and their parameters, should return [code]true[/code] if a collision between the two would occur, with additional details passed in [param results].
+				Overridable version of [PhysicsServer2D]'s internal [code]shape_collide[/code] method. Corresponds to [method PhysicsDirectSpaceState2D.collide_shape].
 			</description>
 		</method>
 		<method name="_shape_get_custom_solver_bias" qualifiers="virtual const">
 			<return type="float" />
 			<param index="0" name="shape" type="RID" />
 			<description>
+				Should return the custom solver bias of the given [param shape], which defines how much bodies are forced to separate on contact when this shape is involved.
+				Overridable version of [PhysicsServer2D]'s internal [code]shape_get_custom_solver_bias[/code] method. Corresponds to [member Shape2D.custom_solver_bias].
 			</description>
 		</method>
 		<method name="_shape_get_data" qualifiers="virtual const">
 			<return type="Variant" />
 			<param index="0" name="shape" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.shape_get_data].
 			</description>
 		</method>
 		<method name="_shape_get_type" qualifiers="virtual const">
 			<return type="int" enum="PhysicsServer2D.ShapeType" />
 			<param index="0" name="shape" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.shape_get_type].
 			</description>
 		</method>
 		<method name="_shape_set_custom_solver_bias" qualifiers="virtual">
@@ -868,6 +1006,8 @@
 			<param index="0" name="shape" type="RID" />
 			<param index="1" name="bias" type="float" />
 			<description>
+				Should set the custom solver bias for the given [param shape]. It defines how much bodies are forced to separate on contact.
+				Overridable version of [PhysicsServer2D]'s internal [code]shape_get_custom_solver_bias[/code] method. Corresponds to [member Shape2D.custom_solver_bias].
 			</description>
 		</method>
 		<method name="_shape_set_data" qualifiers="virtual">
@@ -875,29 +1015,36 @@
 			<param index="0" name="shape" type="RID" />
 			<param index="1" name="data" type="Variant" />
 			<description>
+				Overridable version of [method PhysicsServer2D.shape_set_data].
 			</description>
 		</method>
 		<method name="_space_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.space_create].
 			</description>
 		</method>
 		<method name="_space_get_contact_count" qualifiers="virtual const">
 			<return type="int" />
 			<param index="0" name="space" type="RID" />
 			<description>
+				Should return how many contacts have occurred during the last physics step in the given [param space]. See also [method _space_get_contacts] and [method _space_set_debug_contacts].
+				Overridable version of [PhysicsServer2D]'s internal [code]space_get_contact_count[/code] method.
 			</description>
 		</method>
 		<method name="_space_get_contacts" qualifiers="virtual const">
 			<return type="PackedVector2Array" />
 			<param index="0" name="space" type="RID" />
 			<description>
+				Should return the positions of all contacts that have occurred during the last physics step in the given [param space]. See also [method _space_get_contact_count] and [method _space_set_debug_contacts].
+				Overridable version of [PhysicsServer2D]'s internal [code]space_get_contacts[/code] method.
 			</description>
 		</method>
 		<method name="_space_get_direct_state" qualifiers="virtual">
 			<return type="PhysicsDirectSpaceState2D" />
 			<param index="0" name="space" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.space_get_direct_state].
 			</description>
 		</method>
 		<method name="_space_get_param" qualifiers="virtual const">
@@ -905,12 +1052,14 @@
 			<param index="0" name="space" type="RID" />
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.SpaceParameter" />
 			<description>
+				Overridable version of [method PhysicsServer2D.space_get_param].
 			</description>
 		</method>
 		<method name="_space_is_active" qualifiers="virtual const">
 			<return type="bool" />
 			<param index="0" name="space" type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.space_is_active].
 			</description>
 		</method>
 		<method name="_space_set_active" qualifiers="virtual">
@@ -918,6 +1067,7 @@
 			<param index="0" name="space" type="RID" />
 			<param index="1" name="active" type="bool" />
 			<description>
+				Overridable version of [method PhysicsServer2D.space_set_active].
 			</description>
 		</method>
 		<method name="_space_set_debug_contacts" qualifiers="virtual">
@@ -925,6 +1075,8 @@
 			<param index="0" name="space" type="RID" />
 			<param index="1" name="max_contacts" type="int" />
 			<description>
+				Used internally to allow the given [param space] to store contact points, up to [param max_contacts]. This is automatically set for the main [World2D]'s space when [member SceneTree.debug_collisions_hint] is [code]true[/code], or by checking "Visible Collision Shapes" in the editor. Only works in debug builds.
+				Overridable version of [PhysicsServer2D]'s internal [code]space_set_debug_contacts[/code] method.
 			</description>
 		</method>
 		<method name="_space_set_param" qualifiers="virtual">
@@ -933,34 +1085,42 @@
 			<param index="1" name="param" type="int" enum="PhysicsServer2D.SpaceParameter" />
 			<param index="2" name="value" type="float" />
 			<description>
+				Overridable version of [method PhysicsServer2D.space_set_param].
 			</description>
 		</method>
 		<method name="_step" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="step" type="float" />
 			<description>
+				Called every physics step to process the physics simulation. [param step] is the time elapsed since the last physics step, in seconds. It is usually the same as [method Node.get_physics_process_delta_time].
+				Overridable version of [PhysicsServer2D]'s internal [code skip-lint]step[/code] method.
 			</description>
 		</method>
 		<method name="_sync" qualifiers="virtual">
 			<return type="void" />
 			<description>
+				Called to indicate that the physics server is synchronizing and cannot access physics states if running on a separate thread. See also [method _end_sync].
+				Overridable version of [PhysicsServer2D]'s internal [code]sync[/code] method.
 			</description>
 		</method>
 		<method name="_world_boundary_shape_create" qualifiers="virtual">
 			<return type="RID" />
 			<description>
+				Overridable version of [method PhysicsServer2D.world_boundary_shape_create].
 			</description>
 		</method>
 		<method name="body_test_motion_is_excluding_body" qualifiers="const">
 			<return type="bool" />
 			<param index="0" name="body" type="RID" />
 			<description>
+				Returns [code]true[/code] if the body with the given [RID] is being excluded from [method _body_test_motion]. See also [method Object.get_instance_id].
 			</description>
 		</method>
 		<method name="body_test_motion_is_excluding_object" qualifiers="const">
 			<return type="bool" />
 			<param index="0" name="object" type="int" />
 			<description>
+				Returns [code]true[/code] if the object with the given instance ID is being excluded from [method _body_test_motion]. See also [method Object.get_instance_id].
 			</description>
 		</method>
 	</methods>
diff --git a/doc/classes/PhysicsServer3D.xml b/doc/classes/PhysicsServer3D.xml
index e40d73862b..4a4a1ad025 100644
--- a/doc/classes/PhysicsServer3D.xml
+++ b/doc/classes/PhysicsServer3D.xml
@@ -482,7 +482,7 @@
 			<return type="bool" />
 			<param index="0" name="body" type="RID" />
 			<description>
-				Returns whether a body uses a callback function to calculate its own physics (see [method body_set_force_integration_callback]).
+				Returns [code]true[/code] if the body is omitting the standard force integration. See [method body_set_omit_force_integration].
 			</description>
 		</method>
 		<method name="body_remove_collision_exception">
@@ -582,9 +582,12 @@
 			<param index="1" name="callable" type="Callable" />
 			<param index="2" name="userdata" type="Variant" default="null" />
 			<description>
-				Sets the function used to calculate physics for an object, if that object allows it (see [method body_set_omit_force_integration]). The force integration function takes 2 arguments:
-				- [code]state[/code] — [PhysicsDirectBodyState3D] used to retrieve and modify the body's state.
-				- [code skip-lint]userdata[/code] — optional user data passed to [method body_set_force_integration_callback].
+				Sets the body's custom force integration callback function to [param callable]. Use an empty [Callable] ([code skip-lint]Callable()[/code]) to clear the custom callback.
+				The function [param callable] will be called every physics tick, before the standard force integration (see [method body_set_omit_force_integration]). It can be used for example to update the body's linear and angular velocity based on contact with other bodies.
+				If [param userdata] is not [code]null[/code], the function [param callable] must take the following two parameters:
+				1. [code]state[/code]: a [PhysicsDirectBodyState3D], used to retrieve and modify the body's state,
+				2. [code skip-lint]userdata[/code]: a [Variant]; its value will be the [param userdata] passed into this method.
+				If [param userdata] is [code]null[/code], then [param callable] must take only the [code]state[/code] parameter.
 			</description>
 		</method>
 		<method name="body_set_max_contacts_reported">
@@ -608,7 +611,8 @@
 			<param index="0" name="body" type="RID" />
 			<param index="1" name="enable" type="bool" />
 			<description>
-				Sets whether a body uses a callback function to calculate its own physics (see [method body_set_force_integration_callback]).
+				Sets whether the body omits the standard force integration. If [param enable] is [code]true[/code], the body will not automatically use applied forces, torques, and damping to update the body's linear and angular velocity. In this case, [method body_set_force_integration_callback] can be used to manually update the linear and angular velocity instead.
+				This method is called when the property [member RigidBody3D.custom_integrator] is set.
 			</description>
 		</method>
 		<method name="body_set_param">
@@ -1624,7 +1628,7 @@
 			Constant to set/get the priority (order of processing) of an area.
 		</constant>
 		<constant name="AREA_PARAM_WIND_FORCE_MAGNITUDE" value="10" enum="AreaParameter">
-			Constant to set/get the magnitude of area-specific wind force.
+			Constant to set/get the magnitude of area-specific wind force. This wind force only applies to [SoftBody3D] nodes. Other physics bodies are currently not affected by wind.
 		</constant>
 		<constant name="AREA_PARAM_WIND_SOURCE" value="11" enum="AreaParameter">
 			Constant to set/get the 3D vector that specifies the origin from which an area-specific wind blows.
diff --git a/doc/classes/Polygon2D.xml b/doc/classes/Polygon2D.xml
index 029a69a399..90d3522002 100644
--- a/doc/classes/Polygon2D.xml
+++ b/doc/classes/Polygon2D.xml
@@ -91,7 +91,6 @@
 		</member>
 		<member name="polygon" type="PackedVector2Array" setter="set_polygon" getter="get_polygon" default="PackedVector2Array()">
 			The polygon's list of vertices. The final point will be connected to the first.
-			[b]Note:[/b] This returns a copy of the [PackedVector2Array] rather than a reference.
 		</member>
 		<member name="polygons" type="Array" setter="set_polygons" getter="get_polygons" default="[]">
 			The list of polygons, in case more than one is being represented. Every individual polygon is stored as a [PackedInt32Array] where each [int] is an index to a point in [member polygon]. If empty, this property will be ignored, and the resulting single polygon will be composed of all points in [member polygon], using the order they are stored in.
diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml
index 6a6f53ac60..a5aeee5bc4 100644
--- a/doc/classes/ProjectSettings.xml
+++ b/doc/classes/ProjectSettings.xml
@@ -10,9 +10,9 @@
 		[b]Overriding:[/b] Any project setting can be overridden by creating a file named [code]override.cfg[/code] in the project's root directory. This can also be used in exported projects by placing this file in the same directory as the project binary. Overriding will still take the base project settings' [url=$DOCS_URL/tutorials/export/feature_tags.html]feature tags[/url] in account. Therefore, make sure to [i]also[/i] override the setting with the desired feature tags if you want them to override base project settings on all platforms and configurations.
 	</description>
 	<tutorials>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="OS Test Demo">https://godotengine.org/asset-library/asset/677</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link>
 	</tutorials>
 	<methods>
 		<method name="add_property_info">
@@ -323,6 +323,11 @@
 			If [code]true[/code], disables printing to standard output. This is equivalent to starting the editor or project with the [code]--quiet[/code] [url=$DOCS_URL/tutorials/editor/command_line_tutorial.html]command line argument[/url]. See also [member application/run/disable_stderr].
 			Changes to this setting will only be applied upon restarting the application.
 		</member>
+		<member name="application/run/enable_alt_space_menu" type="bool" setter="" getter="" default="false">
+			If [code]true[/code], allows the [kbd]Alt + Space[/kbd] keys to display the window menu. This menu allows the user to perform various window management operations such as moving, resizing, or minimizing the window.
+			[b]Note:[/b] When the menu is displayed, project execution will pause until the menu is [i]fully[/i] closed due to Windows behavior. Consider this when enabling this setting in a networked multiplayer game. The menu is only considered fully closed when an option is selected, when the user clicks outside, or when [kbd]Escape[/kbd] is pressed after bringing up the window menu [i]and[/i] another key is pressed afterwards.
+			[b]Note:[/b] This setting is implemented only on Windows.
+		</member>
 		<member name="application/run/flush_stdout_on_print" type="bool" setter="" getter="" default="false">
 			If [code]true[/code], flushes the standard output stream every time a line is printed. This affects both terminal logging and file logging.
 			When running a project, this setting must be enabled if you want logs to be collected by service managers such as systemd/journalctl. This setting is disabled by default on release builds, since flushing on every printed line will negatively affect performance if lots of lines are printed in a rapid succession. Also, if this setting is enabled, logged files will still be written successfully if the application crashes or is otherwise killed by the user (without being closed "normally").
@@ -569,7 +574,7 @@
 			When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when using an expression whose type may not be compatible with the function parameter expected.
 		</member>
 		<member name="debug/gdscript/warnings/unsafe_cast" type="int" setter="" getter="" default="0">
-			When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when performing an unsafe cast.
+			When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when a [Variant] value is cast to a non-Variant.
 		</member>
 		<member name="debug/gdscript/warnings/unsafe_method_access" type="int" setter="" getter="" default="0">
 			When set to [code]warn[/code] or [code]error[/code], produces a warning or an error respectively when calling a method whose presence is not guaranteed at compile-time in the class.
@@ -964,7 +969,7 @@
 			The command-line arguments to append to Godot's own command line when running the project. This doesn't affect the editor itself.
 			It is possible to make another executable run Godot by using the [code]%command%[/code] placeholder. The placeholder will be replaced with Godot's own command line. Program-specific arguments should be placed [i]before[/i] the placeholder, whereas Godot-specific arguments should be placed [i]after[/i] the placeholder.
 			For example, this can be used to force the project to run on the dedicated GPU in an NVIDIA Optimus system on Linux:
-			[codeblock]
+			[codeblock lang=text]
 			prime-run %command%
 			[/codeblock]
 		</member>
@@ -2850,7 +2855,7 @@
 		<member name="rendering/vrs/texture" type="String" setter="" getter="" default="&quot;&quot;">
 			If [member rendering/vrs/mode] is set to [b]Texture[/b], this is the path to default texture loaded as the VRS image.
 			The texture [i]must[/i] use a lossless compression format so that colors can be matched precisely. The following VRS densities are mapped to various colors, with brighter colors representing a lower level of shading precision:
-			[codeblock]
+			[codeblock lang=text]
 			- 1×1 = rgb(0, 0, 0)     - #000000
 			- 1×2 = rgb(0, 85, 0)    - #005500
 			- 2×1 = rgb(85, 0, 0)    - #550000
diff --git a/doc/classes/QuadMesh.xml b/doc/classes/QuadMesh.xml
index c5164e9feb..d840616fdd 100644
--- a/doc/classes/QuadMesh.xml
+++ b/doc/classes/QuadMesh.xml
@@ -7,8 +7,8 @@
 		Class representing a square [PrimitiveMesh]. This flat mesh does not have a thickness. By default, this mesh is aligned on the X and Y axes; this rotation is more suited for use with billboarded materials. A [QuadMesh] is equivalent to a [PlaneMesh] except its default [member PlaneMesh.orientation] is [constant PlaneMesh.FACE_Z].
 	</description>
 	<tutorials>
-		<link title="GUI in 3D Demo">https://godotengine.org/asset-library/asset/127</link>
-		<link title="2D in 3D Demo">https://godotengine.org/asset-library/asset/129</link>
+		<link title="GUI in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2807</link>
+		<link title="2D in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2803</link>
 	</tutorials>
 	<members>
 		<member name="orientation" type="int" setter="set_orientation" getter="get_orientation" overrides="PlaneMesh" enum="PlaneMesh.Orientation" default="2" />
diff --git a/doc/classes/Quaternion.xml b/doc/classes/Quaternion.xml
index 8b59448555..665c6335f2 100644
--- a/doc/classes/Quaternion.xml
+++ b/doc/classes/Quaternion.xml
@@ -14,7 +14,7 @@
 		<link title="3Blue1Brown&apos;s video on Quaternions">https://www.youtube.com/watch?v=d4EgbgTm0Bg</link>
 		<link title="Online Quaternion Visualization">https://quaternions.online/</link>
 		<link title="Using 3D transforms">$DOCS_URL/tutorials/3d/using_transforms.html#interpolating-with-quaternions</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 		<link title="Advanced Quaternion Visualization">https://iwatake2222.github.io/rotation_master/rotation_master.html</link>
 	</tutorials>
 	<constructors>
diff --git a/doc/classes/RayCast2D.xml b/doc/classes/RayCast2D.xml
index 31daaab417..16643b0a71 100644
--- a/doc/classes/RayCast2D.xml
+++ b/doc/classes/RayCast2D.xml
@@ -56,6 +56,21 @@
 			<return type="int" />
 			<description>
 				Returns the shape ID of the first object that the ray intersects, or [code]0[/code] if no object is intersecting the ray (i.e. [method is_colliding] returns [code]false[/code]).
+				To get the intersected shape node, for a [CollisionObject2D] target, use:
+				[codeblocks]
+				[gdscript]
+				var target = get_collider() # A CollisionObject2D.
+				var shape_id = get_collider_shape() # The shape index in the collider.
+				var owner_id = target.shape_find_owner(shape_id) # The owner ID in the collider.
+				var shape = target.shape_owner_get_owner(owner_id)
+				[/gdscript]
+				[csharp]
+				var target = (CollisionObject2D)GetCollider(); // A CollisionObject2D.
+				var shapeId = GetColliderShape(); // The shape index in the collider.
+				var ownerId = target.ShapeFindOwner(shapeId); // The owner ID in the collider.
+				var shape = target.ShapeOwnerGetOwner(ownerId);
+				[/csharp]
+				[/codeblocks]
 			</description>
 		</method>
 		<method name="get_collision_mask_value" qualifiers="const">
diff --git a/doc/classes/RayCast3D.xml b/doc/classes/RayCast3D.xml
index f9f94e5cfc..18a544d114 100644
--- a/doc/classes/RayCast3D.xml
+++ b/doc/classes/RayCast3D.xml
@@ -11,7 +11,7 @@
 	</description>
 	<tutorials>
 		<link title="Ray-casting">$DOCS_URL/tutorials/physics/ray-casting.html</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<methods>
 		<method name="add_exception">
@@ -57,6 +57,21 @@
 			<return type="int" />
 			<description>
 				Returns the shape ID of the first object that the ray intersects, or [code]0[/code] if no object is intersecting the ray (i.e. [method is_colliding] returns [code]false[/code]).
+				To get the intersected shape node, for a [CollisionObject3D] target, use:
+				[codeblocks]
+				[gdscript]
+				var target = get_collider() # A CollisionObject3D.
+				var shape_id = get_collider_shape() # The shape index in the collider.
+				var owner_id = target.shape_find_owner(shape_id) # The owner ID in the collider.
+				var shape = target.shape_owner_get_owner(owner_id)
+				[/gdscript]
+				[csharp]
+				var target = (CollisionObject3D)GetCollider(); // A CollisionObject3D.
+				var shapeId = GetColliderShape(); // The shape index in the collider.
+				var ownerId = target.ShapeFindOwner(shapeId); // The owner ID in the collider.
+				var shape = target.ShapeOwnerGetOwner(ownerId);
+				[/csharp]
+				[/codeblocks]
 			</description>
 		</method>
 		<method name="get_collision_face_index" qualifiers="const">
diff --git a/doc/classes/RectangleShape2D.xml b/doc/classes/RectangleShape2D.xml
index 721d73dc14..a62b997a0a 100644
--- a/doc/classes/RectangleShape2D.xml
+++ b/doc/classes/RectangleShape2D.xml
@@ -8,8 +8,8 @@
 		[b]Performance:[/b] [RectangleShape2D] is fast to check collisions against. It is faster than [CapsuleShape2D], but slower than [CircleShape2D].
 	</description>
 	<tutorials>
-		<link title="2D Pong Demo">https://godotengine.org/asset-library/asset/121</link>
-		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/113</link>
+		<link title="2D Pong Demo">https://godotengine.org/asset-library/asset/2728</link>
+		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2719</link>
 	</tutorials>
 	<members>
 		<member name="size" type="Vector2" setter="set_size" getter="get_size" default="Vector2(20, 20)">
diff --git a/doc/classes/RenderingDevice.xml b/doc/classes/RenderingDevice.xml
index 33cd831175..3c1061dee9 100644
--- a/doc/classes/RenderingDevice.xml
+++ b/doc/classes/RenderingDevice.xml
@@ -138,6 +138,15 @@
 				Submits the compute list for processing on the GPU. This is the compute equivalent to [method draw_list_draw].
 			</description>
 		</method>
+		<method name="compute_list_dispatch_indirect">
+			<return type="void" />
+			<param index="0" name="compute_list" type="int" />
+			<param index="1" name="buffer" type="RID" />
+			<param index="2" name="offset" type="int" />
+			<description>
+				Submits the compute list for processing on the GPU with the given group counts stored in the [param buffer] at [param offset]. Buffer must have been created with [constant STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT] flag.
+			</description>
+		</method>
 		<method name="compute_list_end">
 			<return type="void" />
 			<description>
@@ -2129,8 +2138,10 @@
 			Represents the size of the [enum BlendOperation] enum.
 		</constant>
 		<constant name="DYNAMIC_STATE_LINE_WIDTH" value="1" enum="PipelineDynamicStateFlags" is_bitfield="true">
+			Allows dynamically changing the width of rendering lines.
 		</constant>
 		<constant name="DYNAMIC_STATE_DEPTH_BIAS" value="2" enum="PipelineDynamicStateFlags" is_bitfield="true">
+			Allows dynamically changing the depth bias.
 		</constant>
 		<constant name="DYNAMIC_STATE_BLEND_CONSTANTS" value="4" enum="PipelineDynamicStateFlags" is_bitfield="true">
 		</constant>
diff --git a/doc/classes/RenderingServer.xml b/doc/classes/RenderingServer.xml
index c74d61ab88..d7a659c255 100644
--- a/doc/classes/RenderingServer.xml
+++ b/doc/classes/RenderingServer.xml
@@ -1932,7 +1932,7 @@
 			<param index="0" name="instance" type="RID" />
 			<param index="1" name="transform" type="Transform3D" />
 			<description>
-				Sets the world space transform of the instance. Equivalent to [member Node3D.transform].
+				Sets the world space transform of the instance. Equivalent to [member Node3D.global_transform].
 			</description>
 		</method>
 		<method name="instance_set_visibility_parent">
@@ -2576,7 +2576,7 @@
 			<description>
 				Set the entire data to use for drawing the [param multimesh] at once to [param buffer] (such as instance transforms and colors). [param buffer]'s size must match the number of instances multiplied by the per-instance data size (which depends on the enabled MultiMesh fields). Otherwise, an error message is printed and nothing is rendered. See also [method multimesh_get_buffer].
 				The per-instance data size and expected data order is:
-				[codeblock]
+				[codeblock lang=text]
 				2D:
 				  - Position: 8 floats (8 floats for Transform2D)
 				  - Position + Vertex color: 12 floats (8 floats for Transform2D, 4 floats for Color)
diff --git a/doc/classes/ResourceImporterCSVTranslation.xml b/doc/classes/ResourceImporterCSVTranslation.xml
index 122728e75a..578a79fdca 100644
--- a/doc/classes/ResourceImporterCSVTranslation.xml
+++ b/doc/classes/ResourceImporterCSVTranslation.xml
@@ -6,7 +6,7 @@
 	<description>
 		Comma-separated values are a plain text table storage format. The format's simplicity makes it easy to edit in any text editor or spreadsheet software. This makes it a common choice for game localization.
 		[b]Example CSV file:[/b]
-		[codeblock]
+		[codeblock lang=text]
 		keys,en,es,ja
 		GREET,"Hello, friend!","Hola, amigo!",こんにちは
 		ASK,How are you?,Cómo está?,元気ですか
diff --git a/doc/classes/ResourceLoader.xml b/doc/classes/ResourceLoader.xml
index 95fbee4a24..885c6f0478 100644
--- a/doc/classes/ResourceLoader.xml
+++ b/doc/classes/ResourceLoader.xml
@@ -9,7 +9,7 @@
 		[b]Note:[/b] You have to import the files into the engine first to load them using [method load]. If you want to load [Image]s at run-time, you may use [method Image.load]. If you want to import audio files, you can use the snippet described in [member AudioStreamMP3.data].
 	</description>
 	<tutorials>
-		<link title="OS Test Demo">https://godotengine.org/asset-library/asset/677</link>
+		<link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link>
 	</tutorials>
 	<methods>
 		<method name="add_resource_format_loader">
@@ -79,6 +79,7 @@
 				Returns an empty resource if no [ResourceFormatLoader] could handle the file.
 				GDScript has a simplified [method @GDScript.load] built-in method which can be used in most situations, leaving the use of [ResourceLoader] for more advanced scenarios.
 				[b]Note:[/b] If [member ProjectSettings.editor/export/convert_text_resources_to_binary] is [code]true[/code], [method @GDScript.load] will not be able to read converted files in an exported project. If you rely on run-time loading of files present within the PCK, set [member ProjectSettings.editor/export/convert_text_resources_to_binary] to [code]false[/code].
+				[b]Note:[/b] Relative paths will be prefixed with [code]"res://"[/code] before loading, to avoid unexpected results make sure your paths are absolute.
 			</description>
 		</method>
 		<method name="load_threaded_get">
diff --git a/doc/classes/RichTextLabel.xml b/doc/classes/RichTextLabel.xml
index b9a6b06fe3..01c4074e6d 100644
--- a/doc/classes/RichTextLabel.xml
+++ b/doc/classes/RichTextLabel.xml
@@ -12,8 +12,8 @@
 	</description>
 	<tutorials>
 		<link title="BBCode in RichTextLabel">$DOCS_URL/tutorials/ui/bbcode_in_richtextlabel.html</link>
-		<link title="GUI Rich Text/BBcode Demo">https://godotengine.org/asset-library/asset/132</link>
-		<link title="OS Test Demo">https://godotengine.org/asset-library/asset/677</link>
+		<link title="Rich Text Label with BBCode Demo">https://godotengine.org/asset-library/asset/2774</link>
+		<link title="Operating System Testing Demo">https://godotengine.org/asset-library/asset/2789</link>
 	</tutorials>
 	<methods>
 		<method name="add_image">
diff --git a/doc/classes/RigidBody2D.xml b/doc/classes/RigidBody2D.xml
index 269ead1298..5661d1a276 100644
--- a/doc/classes/RigidBody2D.xml
+++ b/doc/classes/RigidBody2D.xml
@@ -11,15 +11,15 @@
 		[b]Note:[/b] Changing the 2D transform or [member linear_velocity] of a [RigidBody2D] very often may lead to some unpredictable behaviors. If you need to directly affect the body, prefer [method _integrate_forces] as it allows you to directly access the physics state.
 	</description>
 	<tutorials>
-		<link title="2D Physics Platformer Demo">https://godotengine.org/asset-library/asset/119</link>
-		<link title="Instancing Demo">https://godotengine.org/asset-library/asset/148</link>
+		<link title="2D Physics Platformer Demo">https://godotengine.org/asset-library/asset/2725</link>
+		<link title="Instancing Demo">https://godotengine.org/asset-library/asset/2716</link>
 	</tutorials>
 	<methods>
 		<method name="_integrate_forces" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="state" type="PhysicsDirectBodyState2D" />
 			<description>
-				Allows you to read and safely modify the simulation state for the object. Use this instead of [method Node._physics_process] if you need to directly change the body's [code]position[/code] or other physics properties. By default, it works in addition to the usual physics behavior, but [member custom_integrator] allows you to disable the default behavior and write custom force integration for a body.
+				Called during physics processing, allowing you to read and safely modify the simulation state for the object. By default, it is called before the standard force integration, but the [member custom_integrator] property allows you to disable the standard force integration and do fully custom force integration for a body.
 			</description>
 		</method>
 		<method name="add_constant_central_force">
@@ -159,7 +159,8 @@
 			Continuous collision detection tries to predict where a moving body will collide instead of moving it and correcting its movement after collision. Continuous collision detection is slower, but more precise and misses fewer collisions with small, fast-moving objects. Raycasting and shapecasting methods are available. See [enum CCDMode] for details.
 		</member>
 		<member name="custom_integrator" type="bool" setter="set_use_custom_integrator" getter="is_using_custom_integrator" default="false">
-			If [code]true[/code], internal force integration is disabled for this body. Aside from collision response, the body will only move as determined by the [method _integrate_forces] function.
+			If [code]true[/code], the standard force integration (like gravity or damping) will be disabled for this body. Other than collision response, the body will only move as determined by the [method _integrate_forces] method, if that virtual method is overridden.
+			Setting this property will call the method [method PhysicsServer2D.body_set_omit_force_integration] internally.
 		</member>
 		<member name="freeze" type="bool" setter="set_freeze_enabled" getter="is_freeze_enabled" default="false">
 			If [code]true[/code], the body is frozen. Gravity and forces are not applied anymore.
diff --git a/doc/classes/RigidBody3D.xml b/doc/classes/RigidBody3D.xml
index c507a7c39a..dae904e2a3 100644
--- a/doc/classes/RigidBody3D.xml
+++ b/doc/classes/RigidBody3D.xml
@@ -12,15 +12,15 @@
 	</description>
 	<tutorials>
 		<link title="Physics introduction">$DOCS_URL/tutorials/physics/physics_introduction.html</link>
-		<link title="3D Truck Town Demo">https://godotengine.org/asset-library/asset/524</link>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
+		<link title="3D Truck Town Demo">https://godotengine.org/asset-library/asset/2752</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
 	</tutorials>
 	<methods>
 		<method name="_integrate_forces" qualifiers="virtual">
 			<return type="void" />
 			<param index="0" name="state" type="PhysicsDirectBodyState3D" />
 			<description>
-				Called during physics processing, allowing you to read and safely modify the simulation state for the object. By default, it works in addition to the usual physics behavior, but the [member custom_integrator] property allows you to disable the default behavior and do fully custom force integration for a body.
+				Called during physics processing, allowing you to read and safely modify the simulation state for the object. By default, it is called before the standard force integration, but the [member custom_integrator] property allows you to disable the standard force integration and do fully custom force integration for a body.
 			</description>
 		</method>
 		<method name="add_constant_central_force">
@@ -166,7 +166,8 @@
 			Continuous collision detection tries to predict where a moving body will collide, instead of moving it and correcting its movement if it collided. Continuous collision detection is more precise, and misses fewer impacts by small, fast-moving objects. Not using continuous collision detection is faster to compute, but can miss small, fast-moving objects.
 		</member>
 		<member name="custom_integrator" type="bool" setter="set_use_custom_integrator" getter="is_using_custom_integrator" default="false">
-			If [code]true[/code], internal force integration will be disabled (like gravity or air friction) for this body. Other than collision response, the body will only move as determined by the [method _integrate_forces] function, if defined.
+			If [code]true[/code], the standard force integration (like gravity or damping) will be disabled for this body. Other than collision response, the body will only move as determined by the [method _integrate_forces] method, if that virtual method is overridden.
+			Setting this property will call the method [method PhysicsServer3D.body_set_omit_force_integration] internally.
 		</member>
 		<member name="freeze" type="bool" setter="set_freeze_enabled" getter="is_freeze_enabled" default="false">
 			If [code]true[/code], the body is frozen. Gravity and forces are not applied anymore.
diff --git a/doc/classes/ShapeCast2D.xml b/doc/classes/ShapeCast2D.xml
index d71c9ce13a..576bd62cc3 100644
--- a/doc/classes/ShapeCast2D.xml
+++ b/doc/classes/ShapeCast2D.xml
@@ -48,6 +48,7 @@
 			<return type="float" />
 			<description>
 				The fraction from the [ShapeCast2D]'s origin to its [member target_position] (between 0 and 1) of how far the shape must move to trigger a collision.
+				In ideal conditions this would be the same as [method get_closest_collision_safe_fraction], however shape casting is calculated in discrete steps, so the precise point of collision can occur between two calculated positions.
 			</description>
 		</method>
 		<method name="get_collider" qualifiers="const">
diff --git a/doc/classes/ShapeCast3D.xml b/doc/classes/ShapeCast3D.xml
index ff057e8c70..2c6efe2ebe 100644
--- a/doc/classes/ShapeCast3D.xml
+++ b/doc/classes/ShapeCast3D.xml
@@ -48,6 +48,7 @@
 			<return type="float" />
 			<description>
 				The fraction from the [ShapeCast3D]'s origin to its [member target_position] (between 0 and 1) of how far the shape must move to trigger a collision.
+				In ideal conditions this would be the same as [method get_closest_collision_safe_fraction], however shape casting is calculated in discrete steps, so the precise point of collision can occur between two calculated positions.
 			</description>
 		</method>
 		<method name="get_collider" qualifiers="const">
diff --git a/doc/classes/Skeleton3D.xml b/doc/classes/Skeleton3D.xml
index 17c93af652..caa3097a6b 100644
--- a/doc/classes/Skeleton3D.xml
+++ b/doc/classes/Skeleton3D.xml
@@ -9,8 +9,7 @@
 		Note that "global pose" below refers to the overall transform of the bone with respect to skeleton, so it is not the actual global/world transform of the bone.
 	</description>
 	<tutorials>
-		<link title="3D Inverse Kinematics Demo">https://godotengine.org/asset-library/asset/523</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="add_bone">
diff --git a/doc/classes/SkeletonIK3D.xml b/doc/classes/SkeletonIK3D.xml
index 46b3d79795..6de6d9d186 100644
--- a/doc/classes/SkeletonIK3D.xml
+++ b/doc/classes/SkeletonIK3D.xml
@@ -26,7 +26,6 @@
 		[/codeblock]
 	</description>
 	<tutorials>
-		<link title="3D Inverse Kinematics Demo">https://godotengine.org/asset-library/asset/523</link>
 	</tutorials>
 	<methods>
 		<method name="get_parent_skeleton" qualifiers="const">
diff --git a/doc/classes/SkeletonProfileHumanoid.xml b/doc/classes/SkeletonProfileHumanoid.xml
index 5539d1d980..9c9cc2d8c7 100644
--- a/doc/classes/SkeletonProfileHumanoid.xml
+++ b/doc/classes/SkeletonProfileHumanoid.xml
@@ -6,7 +6,7 @@
 	<description>
 		A [SkeletonProfile] as a preset that is optimized for the human form. This exists for standardization, so all parameters are read-only.
 		A humanoid skeleton profile contains 54 bones divided in 4 groups: [code]"Body"[/code], [code]"Face"[/code], [code]"LeftHand"[/code], and [code]"RightHand"[/code]. It is structured as follows:
-		[codeblock]
+		[codeblock lang=text]
 		Root
 		└─ Hips
 		    ├─ LeftUpperLeg
diff --git a/doc/classes/SkinReference.xml b/doc/classes/SkinReference.xml
index 466dbe2500..cb0c44cefa 100644
--- a/doc/classes/SkinReference.xml
+++ b/doc/classes/SkinReference.xml
@@ -1,8 +1,14 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <class name="SkinReference" inherits="RefCounted" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../class.xsd">
 	<brief_description>
+		A reference-counted holder object for a skeleton RID used in the [RenderingServer].
 	</brief_description>
 	<description>
+		An internal object containing a mapping from a [Skin] used within the context of a particular [MeshInstance3D] to refer to the skeleton's [RID] in the RenderingServer.
+		See also [method MeshInstance3D.get_skin_reference] and [method RenderingServer.instance_attach_skeleton].
+		Note that despite the similar naming, the skeleton RID used in the [RenderingServer] does not have a direct one-to-one correspondence to a [Skeleton3D] node.
+		In particular, a [Skeleton3D] node with no [MeshInstance3D] children may be unknown to the [RenderingServer].
+		On the other hand, a [Skeleton3D] with multiple [MeshInstance3D] nodes which each have different [member MeshInstance3D.skin] objects may have multiple SkinReference instances (and hence, multiple skeleton [RID]s).
 	</description>
 	<tutorials>
 	</tutorials>
@@ -10,11 +16,14 @@
 		<method name="get_skeleton" qualifiers="const">
 			<return type="RID" />
 			<description>
+				Returns the [RID] owned by this SkinReference, as returned by [method RenderingServer.skeleton_create].
 			</description>
 		</method>
 		<method name="get_skin" qualifiers="const">
 			<return type="Skin" />
 			<description>
+				Returns the [Skin] connected to this SkinReference. In the case of [MeshInstance3D] with no [member MeshInstance3D.skin] assigned, this will reference an internal default [Skin] owned by that [MeshInstance3D].
+				Note that a single [Skin] may have more than one [SkinReference] in the case that it is shared by meshes across multiple [Skeleton3D] nodes.
 			</description>
 		</method>
 	</methods>
diff --git a/doc/classes/SliderJoint3D.xml b/doc/classes/SliderJoint3D.xml
index 49b362041b..8930514492 100644
--- a/doc/classes/SliderJoint3D.xml
+++ b/doc/classes/SliderJoint3D.xml
@@ -13,6 +13,7 @@
 			<return type="float" />
 			<param index="0" name="param" type="int" enum="SliderJoint3D.Param" />
 			<description>
+				Returns the value of the given parameter (see [enum Param] constants).
 			</description>
 		</method>
 		<method name="set_param">
@@ -20,6 +21,7 @@
 			<param index="0" name="param" type="int" enum="SliderJoint3D.Param" />
 			<param index="1" name="value" type="float" />
 			<description>
+				Assigns [param value] to the given parameter (see [enum Param] constants).
 			</description>
 		</method>
 	</methods>
@@ -96,70 +98,70 @@
 	</members>
 	<constants>
 		<constant name="PARAM_LINEAR_LIMIT_UPPER" value="0" enum="Param">
-			The maximum difference between the pivot points on their X axis before damping happens.
+			Constant for accessing [member linear_limit/upper_distance]. The maximum difference between the pivot points on their X axis before damping happens.
 		</constant>
 		<constant name="PARAM_LINEAR_LIMIT_LOWER" value="1" enum="Param">
-			The minimum difference between the pivot points on their X axis before damping happens.
+			Constant for accessing [member linear_limit/lower_distance]. The minimum difference between the pivot points on their X axis before damping happens.
 		</constant>
 		<constant name="PARAM_LINEAR_LIMIT_SOFTNESS" value="2" enum="Param">
-			A factor applied to the movement across the slider axis once the limits get surpassed. The lower, the slower the movement.
+			Constant for accessing [member linear_limit/softness]. A factor applied to the movement across the slider axis once the limits get surpassed. The lower, the slower the movement.
 		</constant>
 		<constant name="PARAM_LINEAR_LIMIT_RESTITUTION" value="3" enum="Param">
-			The amount of restitution once the limits are surpassed. The lower, the more velocity-energy gets lost.
+			Constant for accessing [member linear_limit/restitution]. The amount of restitution once the limits are surpassed. The lower, the more velocity-energy gets lost.
 		</constant>
 		<constant name="PARAM_LINEAR_LIMIT_DAMPING" value="4" enum="Param">
-			The amount of damping once the slider limits are surpassed.
+			Constant for accessing [member linear_limit/damping]. The amount of damping once the slider limits are surpassed.
 		</constant>
 		<constant name="PARAM_LINEAR_MOTION_SOFTNESS" value="5" enum="Param">
-			A factor applied to the movement across the slider axis as long as the slider is in the limits. The lower, the slower the movement.
+			Constant for accessing [member linear_motion/softness]. A factor applied to the movement across the slider axis as long as the slider is in the limits. The lower, the slower the movement.
 		</constant>
 		<constant name="PARAM_LINEAR_MOTION_RESTITUTION" value="6" enum="Param">
-			The amount of restitution inside the slider limits.
+			Constant for accessing [member linear_motion/restitution]. The amount of restitution inside the slider limits.
 		</constant>
 		<constant name="PARAM_LINEAR_MOTION_DAMPING" value="7" enum="Param">
-			The amount of damping inside the slider limits.
+			Constant for accessing [member linear_motion/damping]. The amount of damping inside the slider limits.
 		</constant>
 		<constant name="PARAM_LINEAR_ORTHOGONAL_SOFTNESS" value="8" enum="Param">
-			A factor applied to the movement across axes orthogonal to the slider.
+			Constant for accessing [member linear_ortho/softness]. A factor applied to the movement across axes orthogonal to the slider.
 		</constant>
 		<constant name="PARAM_LINEAR_ORTHOGONAL_RESTITUTION" value="9" enum="Param">
-			The amount of restitution when movement is across axes orthogonal to the slider.
+			Constant for accessing [member linear_motion/restitution]. The amount of restitution when movement is across axes orthogonal to the slider.
 		</constant>
 		<constant name="PARAM_LINEAR_ORTHOGONAL_DAMPING" value="10" enum="Param">
-			The amount of damping when movement is across axes orthogonal to the slider.
+			Constant for accessing [member linear_motion/damping]. The amount of damping when movement is across axes orthogonal to the slider.
 		</constant>
 		<constant name="PARAM_ANGULAR_LIMIT_UPPER" value="11" enum="Param">
-			The upper limit of rotation in the slider.
+			Constant for accessing [member angular_limit/upper_angle]. The upper limit of rotation in the slider.
 		</constant>
 		<constant name="PARAM_ANGULAR_LIMIT_LOWER" value="12" enum="Param">
-			The lower limit of rotation in the slider.
+			Constant for accessing [member angular_limit/lower_angle]. The lower limit of rotation in the slider.
 		</constant>
 		<constant name="PARAM_ANGULAR_LIMIT_SOFTNESS" value="13" enum="Param">
-			A factor applied to the all rotation once the limit is surpassed.
+			Constant for accessing [member angular_limit/softness]. A factor applied to the all rotation once the limit is surpassed.
 		</constant>
 		<constant name="PARAM_ANGULAR_LIMIT_RESTITUTION" value="14" enum="Param">
-			The amount of restitution of the rotation when the limit is surpassed.
+			Constant for accessing [member angular_limit/restitution]. The amount of restitution of the rotation when the limit is surpassed.
 		</constant>
 		<constant name="PARAM_ANGULAR_LIMIT_DAMPING" value="15" enum="Param">
-			The amount of damping of the rotation when the limit is surpassed.
+			Constant for accessing [member angular_limit/damping]. The amount of damping of the rotation when the limit is surpassed.
 		</constant>
 		<constant name="PARAM_ANGULAR_MOTION_SOFTNESS" value="16" enum="Param">
-			A factor applied to the all rotation in the limits.
+			Constant for accessing [member angular_motion/softness]. A factor applied to the all rotation in the limits.
 		</constant>
 		<constant name="PARAM_ANGULAR_MOTION_RESTITUTION" value="17" enum="Param">
-			The amount of restitution of the rotation in the limits.
+			Constant for accessing [member angular_motion/restitution]. The amount of restitution of the rotation in the limits.
 		</constant>
 		<constant name="PARAM_ANGULAR_MOTION_DAMPING" value="18" enum="Param">
-			The amount of damping of the rotation in the limits.
+			Constant for accessing [member angular_motion/damping]. The amount of damping of the rotation in the limits.
 		</constant>
 		<constant name="PARAM_ANGULAR_ORTHOGONAL_SOFTNESS" value="19" enum="Param">
-			A factor applied to the all rotation across axes orthogonal to the slider.
+			Constant for accessing [member angular_ortho/softness]. A factor applied to the all rotation across axes orthogonal to the slider.
 		</constant>
 		<constant name="PARAM_ANGULAR_ORTHOGONAL_RESTITUTION" value="20" enum="Param">
-			The amount of restitution of the rotation across axes orthogonal to the slider.
+			Constant for accessing [member angular_ortho/restitution]. The amount of restitution of the rotation across axes orthogonal to the slider.
 		</constant>
 		<constant name="PARAM_ANGULAR_ORTHOGONAL_DAMPING" value="21" enum="Param">
-			The amount of damping of the rotation across axes orthogonal to the slider.
+			Constant for accessing [member angular_ortho/damping]. The amount of damping of the rotation across axes orthogonal to the slider.
 		</constant>
 		<constant name="PARAM_MAX" value="22" enum="Param">
 			Represents the size of the [enum Param] enum.
diff --git a/doc/classes/SoftBody3D.xml b/doc/classes/SoftBody3D.xml
index a4d80a7c3e..195196b78c 100644
--- a/doc/classes/SoftBody3D.xml
+++ b/doc/classes/SoftBody3D.xml
@@ -5,6 +5,7 @@
 	</brief_description>
 	<description>
 		A deformable 3D physics mesh. Used to create elastic or deformable objects such as cloth, rubber, or other flexible materials.
+		Additionally, [SoftBody3D] is subject to wind forces defined in [Area3D] (see [member Area3D.wind_source_path], [member Area3D.wind_force_magnitude], and [member Area3D.wind_attenuation_factor]).
 		[b]Note:[/b] There are many known bugs in [SoftBody3D]. Therefore, it's not recommended to use them for things that can affect gameplay (such as trampolines).
 	</description>
 	<tutorials>
diff --git a/doc/classes/SphereShape3D.xml b/doc/classes/SphereShape3D.xml
index 313b05dff4..4b14c535dd 100644
--- a/doc/classes/SphereShape3D.xml
+++ b/doc/classes/SphereShape3D.xml
@@ -8,7 +8,7 @@
 		[b]Performance:[/b] [SphereShape3D] is fast to check collisions against. It is faster than [BoxShape3D], [CapsuleShape3D], and [CylinderShape3D].
 	</description>
 	<tutorials>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
 	</tutorials>
 	<members>
 		<member name="radius" type="float" setter="set_radius" getter="get_radius" default="0.5">
diff --git a/doc/classes/SpotLight3D.xml b/doc/classes/SpotLight3D.xml
index ba70edb933..8d2177d3fd 100644
--- a/doc/classes/SpotLight3D.xml
+++ b/doc/classes/SpotLight3D.xml
@@ -11,7 +11,7 @@
 	<tutorials>
 		<link title="3D lights and shadows">$DOCS_URL/tutorials/3d/lights_and_shadows.html</link>
 		<link title="Faking global illumination">$DOCS_URL/tutorials/3d/global_illumination/faking_global_illumination.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<members>
 		<member name="shadow_bias" type="float" setter="set_param" getter="get_param" overrides="Light3D" default="0.03" />
diff --git a/doc/classes/Sprite2D.xml b/doc/classes/Sprite2D.xml
index 8dcf286b3a..10ac4b0fcc 100644
--- a/doc/classes/Sprite2D.xml
+++ b/doc/classes/Sprite2D.xml
@@ -7,7 +7,7 @@
 		A node that displays a 2D texture. The texture displayed can be a region from a larger atlas texture, or a frame from a sprite sheet animation.
 	</description>
 	<tutorials>
-		<link title="Instancing Demo">https://godotengine.org/asset-library/asset/148</link>
+		<link title="Instancing Demo">https://godotengine.org/asset-library/asset/2716</link>
 	</tutorials>
 	<methods>
 		<method name="get_rect" qualifiers="const">
diff --git a/doc/classes/StaticBody3D.xml b/doc/classes/StaticBody3D.xml
index 81eda8505b..a2a9a97bc1 100644
--- a/doc/classes/StaticBody3D.xml
+++ b/doc/classes/StaticBody3D.xml
@@ -9,9 +9,9 @@
 		[StaticBody3D] is useful for completely static objects like floors and walls, as well as moving surfaces like conveyor belts and circular revolving platforms (by using [member constant_linear_velocity] and [member constant_angular_velocity]).
 	</description>
 	<tutorials>
-		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/675</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Physics Tests Demo">https://godotengine.org/asset-library/asset/2747</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<members>
 		<member name="constant_angular_velocity" type="Vector3" setter="set_constant_angular_velocity" getter="get_constant_angular_velocity" default="Vector3(0, 0, 0)">
diff --git a/doc/classes/String.xml b/doc/classes/String.xml
index 7592342602..a33a1aea41 100644
--- a/doc/classes/String.xml
+++ b/doc/classes/String.xml
@@ -255,6 +255,13 @@
 				print("User {id} is {name}.".format([["id", 42], ["name", "Godot"]]))
 				[/codeblock]
 				See also the [url=$DOCS_URL/tutorials/scripting/gdscript/gdscript_format_string.html]GDScript format string[/url] tutorial.
+				[b]Note:[/b] The replacement of placeholders is not done all at once, instead each placeholder is replaced in the order they are passed, this means that if one of the replacement strings contains a key it will also be replaced. This can be very powerful, but can also cause unexpected results if you are not careful. If you do not need to perform replacement in the replacement strings, make sure your replacements do not contain placeholders to ensure reliable results.
+				[codeblock]
+				print("{0} {1}".format(["{1}", "x"]))                       # Prints "x x".
+				print("{0} {1}".format(["x", "{0}"]))                       # Prints "x {0}".
+				print("{foo} {bar}".format({"foo": "{bar}", "bar": "baz"})) # Prints "baz baz".
+				print("{foo} {bar}".format({"bar": "baz", "foo": "{bar}"})) # Prints "{bar} baz".
+				[/codeblock]
 				[b]Note:[/b] In C#, it's recommended to [url=https://learn.microsoft.com/en-us/dotnet/csharp/language-reference/tokens/interpolated]interpolate strings with "$"[/url], instead.
 			</description>
 		</method>
diff --git a/doc/classes/StyleBoxFlat.xml b/doc/classes/StyleBoxFlat.xml
index 2c3908a72c..181e1ff77a 100644
--- a/doc/classes/StyleBoxFlat.xml
+++ b/doc/classes/StyleBoxFlat.xml
@@ -7,13 +7,13 @@
 		By configuring various properties of this style box, you can achieve many common looks without the need of a texture. This includes optionally rounded borders, antialiasing, shadows, and skew.
 		Setting corner radius to high values is allowed. As soon as corners overlap, the stylebox will switch to a relative system.
 		[b]Example:[/b]
-		[codeblock]
+		[codeblock lang=text]
 		height = 30
 		corner_radius_top_left = 50
 		corner_radius_bottom_left = 100
 		[/codeblock]
 		The relative system now would take the 1:2 ratio of the two left corners to calculate the actual corner width. Both corners added will [b]never[/b] be more than the height. Result:
-		[codeblock]
+		[codeblock lang=text]
 		corner_radius_top_left: 10
 		corner_radius_bottom_left: 20
 		[/codeblock]
diff --git a/doc/classes/SubViewport.xml b/doc/classes/SubViewport.xml
index 1a7a4f6e96..605cf949c1 100644
--- a/doc/classes/SubViewport.xml
+++ b/doc/classes/SubViewport.xml
@@ -10,12 +10,12 @@
 	<tutorials>
 		<link title="Using Viewports">$DOCS_URL/tutorials/rendering/viewports.html</link>
 		<link title="Viewport and canvas transforms">$DOCS_URL/tutorials/2d/2d_transforms.html</link>
-		<link title="GUI in 3D Demo">https://godotengine.org/asset-library/asset/127</link>
-		<link title="3D in 2D Demo">https://godotengine.org/asset-library/asset/128</link>
-		<link title="2D in 3D Demo">https://godotengine.org/asset-library/asset/129</link>
-		<link title="Screen Capture Demo">https://godotengine.org/asset-library/asset/130</link>
-		<link title="Dynamic Split Screen Demo">https://godotengine.org/asset-library/asset/541</link>
-		<link title="3D Viewport Scaling Demo">https://godotengine.org/asset-library/asset/586</link>
+		<link title="GUI in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2807</link>
+		<link title="3D in 2D Viewport Demo">https://godotengine.org/asset-library/asset/2804</link>
+		<link title="2D in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2803</link>
+		<link title="Screen Capture Demo">https://godotengine.org/asset-library/asset/2808</link>
+		<link title="Dynamic Split Screen Demo">https://godotengine.org/asset-library/asset/2806</link>
+		<link title="3D Resolution Scaling Demo">https://godotengine.org/asset-library/asset/2805</link>
 	</tutorials>
 	<members>
 		<member name="render_target_clear_mode" type="int" setter="set_clear_mode" getter="get_clear_mode" enum="SubViewport.ClearMode" default="0">
diff --git a/doc/classes/SurfaceTool.xml b/doc/classes/SurfaceTool.xml
index 094275c349..576587a5df 100644
--- a/doc/classes/SurfaceTool.xml
+++ b/doc/classes/SurfaceTool.xml
@@ -29,7 +29,7 @@
 	</description>
 	<tutorials>
 		<link title="Using the SurfaceTool">$DOCS_URL/tutorials/3d/procedural_geometry/surfacetool.html</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<methods>
 		<method name="add_index">
diff --git a/doc/classes/TextureButton.xml b/doc/classes/TextureButton.xml
index 6c0d56af05..861981f979 100644
--- a/doc/classes/TextureButton.xml
+++ b/doc/classes/TextureButton.xml
@@ -9,7 +9,7 @@
 		See also [BaseButton] which contains common properties and methods associated with this node.
 	</description>
 	<tutorials>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<members>
 		<member name="flip_h" type="bool" setter="set_flip_h" getter="is_flipped_h" default="false">
diff --git a/doc/classes/TextureRect.xml b/doc/classes/TextureRect.xml
index d18c4a1eee..ab268b6ea5 100644
--- a/doc/classes/TextureRect.xml
+++ b/doc/classes/TextureRect.xml
@@ -7,7 +7,7 @@
 		A control that displays a texture, for example an icon inside a GUI. The texture's placement can be controlled with the [member stretch_mode] property. It can scale, tile, or stay centered inside its bounding rectangle.
 	</description>
 	<tutorials>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<members>
 		<member name="expand_mode" type="int" setter="set_expand_mode" getter="get_expand_mode" enum="TextureRect.ExpandMode" default="0" experimental="Using [constant EXPAND_FIT_WIDTH], [constant EXPAND_FIT_WIDTH_PROPORTIONAL], [constant EXPAND_FIT_HEIGHT], or [constant EXPAND_FIT_HEIGHT_PROPORTIONAL] may result in unstable behavior in some [Container] controls. This behavior may be re-evaluated and changed in the future.">
diff --git a/doc/classes/Thread.xml b/doc/classes/Thread.xml
index dbf63c0852..cf77b93d86 100644
--- a/doc/classes/Thread.xml
+++ b/doc/classes/Thread.xml
@@ -14,7 +14,7 @@
 	<tutorials>
 		<link title="Using multiple threads">$DOCS_URL/tutorials/performance/using_multiple_threads.html</link>
 		<link title="Thread-safe APIs">$DOCS_URL/tutorials/performance/thread_safe_apis.html</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 	<methods>
 		<method name="get_id" qualifiers="const">
diff --git a/doc/classes/TileMap.xml b/doc/classes/TileMap.xml
index bc8a1d7bf1..687c7194cd 100644
--- a/doc/classes/TileMap.xml
+++ b/doc/classes/TileMap.xml
@@ -10,12 +10,12 @@
 	</description>
 	<tutorials>
 		<link title="Using Tilemaps">$DOCS_URL/tutorials/2d/using_tilemaps.html</link>
-		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/120</link>
-		<link title="2D Isometric Demo">https://godotengine.org/asset-library/asset/112</link>
-		<link title="2D Hexagonal Demo">https://godotengine.org/asset-library/asset/111</link>
-		<link title="2D Navigation Astar Demo">https://godotengine.org/asset-library/asset/519</link>
-		<link title="2D Role Playing Game Demo">https://godotengine.org/asset-library/asset/520</link>
-		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/113</link>
+		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/2727</link>
+		<link title="2D Isometric Demo">https://godotengine.org/asset-library/asset/2718</link>
+		<link title="2D Hexagonal Demo">https://godotengine.org/asset-library/asset/2717</link>
+		<link title="2D Grid-based Navigation with AStarGrid2D Demo">https://godotengine.org/asset-library/asset/2723</link>
+		<link title="2D Role Playing Game (RPG) Demo">https://godotengine.org/asset-library/asset/2729</link>
+		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2719</link>
 	</tutorials>
 	<methods>
 		<method name="_tile_data_runtime_update" qualifiers="virtual">
diff --git a/doc/classes/TileMapLayer.xml b/doc/classes/TileMapLayer.xml
index bc8e259599..da716a8fe3 100644
--- a/doc/classes/TileMapLayer.xml
+++ b/doc/classes/TileMapLayer.xml
@@ -268,7 +268,7 @@
 			The quadrant size does not apply on a Y-sorted [TileMapLayer], as tiles are be grouped by Y position instead in that case.
 			[b]Note:[/b] As quadrants are created according to the map's coordinate system, the quadrant's "square shape" might not look like square in the [TileMapLayer]'s local coordinate system.
 		</member>
-		<member name="tile_map_data" type="PackedByteArray" setter="set_tile_map_data_from_array" getter="get_tile_map_data_as_array" default="PackedByteArray(0, 0)">
+		<member name="tile_map_data" type="PackedByteArray" setter="set_tile_map_data_from_array" getter="get_tile_map_data_as_array" default="PackedByteArray(&quot;AAA=&quot;)">
 			The raw tile map data as a byte array.
 		</member>
 		<member name="tile_set" type="TileSet" setter="set_tile_set" getter="get_tile_set">
diff --git a/doc/classes/TileSet.xml b/doc/classes/TileSet.xml
index 37f4cc5c0b..64cd4fb7b2 100644
--- a/doc/classes/TileSet.xml
+++ b/doc/classes/TileSet.xml
@@ -13,12 +13,12 @@
 	</description>
 	<tutorials>
 		<link title="Using Tilemaps">$DOCS_URL/tutorials/2d/using_tilemaps.html</link>
-		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/120</link>
-		<link title="2D Isometric Demo">https://godotengine.org/asset-library/asset/112</link>
-		<link title="2D Hexagonal Demo">https://godotengine.org/asset-library/asset/111</link>
-		<link title="2D Navigation Astar Demo">https://godotengine.org/asset-library/asset/519</link>
-		<link title="2D Role Playing Game Demo">https://godotengine.org/asset-library/asset/520</link>
-		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/113</link>
+		<link title="2D Platformer Demo">https://godotengine.org/asset-library/asset/2727</link>
+		<link title="2D Isometric Demo">https://godotengine.org/asset-library/asset/2718</link>
+		<link title="2D Hexagonal Demo">https://godotengine.org/asset-library/asset/2717</link>
+		<link title="2D Grid-based Navigation with AStarGrid2D Demo">https://godotengine.org/asset-library/asset/2723</link>
+		<link title="2D Role Playing Game (RPG) Demo">https://godotengine.org/asset-library/asset/2729</link>
+		<link title="2D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2719</link>
 	</tutorials>
 	<methods>
 		<method name="add_custom_data_layer">
diff --git a/doc/classes/Timer.xml b/doc/classes/Timer.xml
index 6eee569443..9de1e09273 100644
--- a/doc/classes/Timer.xml
+++ b/doc/classes/Timer.xml
@@ -15,7 +15,7 @@
 		[b]Note:[/b] Timers are affected by [member Engine.time_scale]. The higher the time scale, the sooner timers will end. How often a timer processes may depend on the framerate or [member Engine.physics_ticks_per_second].
 	</description>
 	<tutorials>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
 	</tutorials>
 	<methods>
 		<method name="is_stopped" qualifiers="const">
diff --git a/doc/classes/Transform2D.xml b/doc/classes/Transform2D.xml
index 4247ff81ee..345d0512ff 100644
--- a/doc/classes/Transform2D.xml
+++ b/doc/classes/Transform2D.xml
@@ -10,8 +10,8 @@
 	<tutorials>
 		<link title="Math documentation index">$DOCS_URL/tutorials/math/index.html</link>
 		<link title="Matrices and transforms">$DOCS_URL/tutorials/math/matrices_and_transforms.html</link>
-		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/584</link>
-		<link title="2.5D Demo">https://godotengine.org/asset-library/asset/583</link>
+		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/2787</link>
+		<link title="2.5D Game Demo">https://godotengine.org/asset-library/asset/2783</link>
 	</tutorials>
 	<constructors>
 		<constructor name="Transform2D">
diff --git a/doc/classes/Transform3D.xml b/doc/classes/Transform3D.xml
index 1827cdf8f0..2a0bbc46af 100644
--- a/doc/classes/Transform3D.xml
+++ b/doc/classes/Transform3D.xml
@@ -12,9 +12,9 @@
 		<link title="Math documentation index">$DOCS_URL/tutorials/math/index.html</link>
 		<link title="Matrices and transforms">$DOCS_URL/tutorials/math/matrices_and_transforms.html</link>
 		<link title="Using 3D transforms">$DOCS_URL/tutorials/3d/using_transforms.html</link>
-		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/584</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="2.5D Demo">https://godotengine.org/asset-library/asset/583</link>
+		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/2787</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="2.5D Game Demo">https://godotengine.org/asset-library/asset/2783</link>
 	</tutorials>
 	<constructors>
 		<constructor name="Transform3D">
diff --git a/doc/classes/VBoxContainer.xml b/doc/classes/VBoxContainer.xml
index d3ea94c0eb..62484ecdc0 100644
--- a/doc/classes/VBoxContainer.xml
+++ b/doc/classes/VBoxContainer.xml
@@ -8,6 +8,6 @@
 	</description>
 	<tutorials>
 		<link title="Using Containers">$DOCS_URL/tutorials/ui/gui_containers.html</link>
-		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/676</link>
+		<link title="3D Voxel Demo">https://godotengine.org/asset-library/asset/2755</link>
 	</tutorials>
 </class>
diff --git a/doc/classes/Vector2.xml b/doc/classes/Vector2.xml
index f33076a92f..7b166a4fb0 100644
--- a/doc/classes/Vector2.xml
+++ b/doc/classes/Vector2.xml
@@ -14,7 +14,7 @@
 		<link title="Vector math">$DOCS_URL/tutorials/math/vector_math.html</link>
 		<link title="Advanced vector math">$DOCS_URL/tutorials/math/vectors_advanced.html</link>
 		<link title="3Blue1Brown Essence of Linear Algebra">https://www.youtube.com/playlist?list=PLZHQObOWTQDPD3MizzM2xVFitgF8hE_ab</link>
-		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/584</link>
+		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/2787</link>
 		<link title="All 2D Demos">https://github.com/godotengine/godot-demo-projects/tree/master/2d</link>
 	</tutorials>
 	<constructors>
@@ -110,7 +110,8 @@
 			<return type="Vector2" />
 			<param index="0" name="n" type="Vector2" />
 			<description>
-				Returns a new vector "bounced off" from a plane defined by the given normal.
+				Returns the vector "bounced off" from a line defined by the given normal [param n] perpendicular to the line.
+				[b]Note:[/b] [method bounce] performs the operation that most engines and frameworks call [code skip-lint]reflect()[/code].
 			</description>
 		</method>
 		<method name="ceil" qualifiers="const">
@@ -132,7 +133,7 @@
 			<param index="0" name="with" type="Vector2" />
 			<description>
 				Returns the 2D analog of the cross product for this vector and [param with].
-				This is the signed area of the parallelogram formed by the two vectors. If the second vector is clockwise from the first vector, then the cross product is the positive area. If counter-clockwise, the cross product is the negative area.
+				This is the signed area of the parallelogram formed by the two vectors. If the second vector is clockwise from the first vector, then the cross product is the positive area. If counter-clockwise, the cross product is the negative area. If the two vectors are parallel this returns zero, making it useful for testing if two vectors are parallel.
 				[b]Note:[/b] Cross product is not defined in 2D mathematically. This method embeds the 2D vectors in the XY plane of 3D space and uses their cross product's Z component as the analog.
 			</description>
 		</method>
@@ -321,9 +322,10 @@
 		</method>
 		<method name="reflect" qualifiers="const">
 			<return type="Vector2" />
-			<param index="0" name="n" type="Vector2" />
+			<param index="0" name="line" type="Vector2" />
 			<description>
-				Returns the result of reflecting the vector from a line defined by the given direction vector [param n].
+				Returns the result of reflecting the vector from a line defined by the given direction vector [param line].
+				[b]Note:[/b] [method reflect] differs from what other engines and frameworks call [code skip-lint]reflect()[/code]. In other engines, [code skip-lint]reflect()[/code] takes a normal direction which is a direction perpendicular to the line. In Godot, you specify the direction of the line directly. See also [method bounce] which does what most engines call [code skip-lint]reflect()[/code].
 			</description>
 		</method>
 		<method name="rotated" qualifiers="const">
diff --git a/doc/classes/Vector3.xml b/doc/classes/Vector3.xml
index 872534fd89..031d91af78 100644
--- a/doc/classes/Vector3.xml
+++ b/doc/classes/Vector3.xml
@@ -14,7 +14,7 @@
 		<link title="Vector math">$DOCS_URL/tutorials/math/vector_math.html</link>
 		<link title="Advanced vector math">$DOCS_URL/tutorials/math/vectors_advanced.html</link>
 		<link title="3Blue1Brown Essence of Linear Algebra">https://www.youtube.com/playlist?list=PLZHQObOWTQDPD3MizzM2xVFitgF8hE_ab</link>
-		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/584</link>
+		<link title="Matrix Transform Demo">https://godotengine.org/asset-library/asset/2787</link>
 		<link title="All 3D Demos">https://github.com/godotengine/godot-demo-projects/tree/master/3d</link>
 	</tutorials>
 	<constructors>
@@ -86,7 +86,8 @@
 			<return type="Vector3" />
 			<param index="0" name="n" type="Vector3" />
 			<description>
-				Returns the vector "bounced off" from a plane defined by the given normal.
+				Returns the vector "bounced off" from a plane defined by the given normal [param n].
+				[b]Note:[/b] [method bounce] performs the operation that most engines and frameworks call [code skip-lint]reflect()[/code].
 			</description>
 		</method>
 		<method name="ceil" qualifiers="const">
@@ -108,6 +109,7 @@
 			<param index="0" name="with" type="Vector3" />
 			<description>
 				Returns the cross product of this vector and [param with].
+				This returns a vector perpendicular to both this and [param with], which would be the normal vector of the plane defined by the two vectors. As there are two such vectors, in opposite directions, this method returns the vector defined by a right-handed coordinate system. If the two vectors are parallel this returns an empty vector, making it useful for testing if two vectors are parallel.
 			</description>
 		</method>
 		<method name="cubic_interpolate" qualifiers="const">
@@ -305,9 +307,10 @@
 		</method>
 		<method name="reflect" qualifiers="const">
 			<return type="Vector3" />
-			<param index="0" name="n" type="Vector3" />
+			<param index="0" name="direction" type="Vector3" />
 			<description>
-				Returns the result of reflecting the vector from a plane defined by the given normal [param n].
+				Returns the result of reflecting the vector from a plane defined by the given direction vector [param direction].
+				[b]Note:[/b] [method reflect] differs from what other engines and frameworks call [code skip-lint]reflect()[/code]. In other engines, [code skip-lint]reflect()[/code] takes a normal direction which is a direction perpendicular to the plane. In Godot, you specify a direction parallel to the plane. See also [method bounce] which does what most engines call [code skip-lint]reflect()[/code].
 			</description>
 		</method>
 		<method name="rotated" qualifiers="const">
diff --git a/doc/classes/VehicleBody3D.xml b/doc/classes/VehicleBody3D.xml
index 359e84c3da..5b79067659 100644
--- a/doc/classes/VehicleBody3D.xml
+++ b/doc/classes/VehicleBody3D.xml
@@ -9,7 +9,7 @@
 		[b]Note:[/b] This class has known issues and isn't designed to provide realistic 3D vehicle physics. If you want advanced vehicle physics, you may have to write your own physics integration using [CharacterBody3D] or [RigidBody3D].
 	</description>
 	<tutorials>
-		<link title="3D Truck Town Demo">https://godotengine.org/asset-library/asset/524</link>
+		<link title="3D Truck Town Demo">https://godotengine.org/asset-library/asset/2752</link>
 	</tutorials>
 	<members>
 		<member name="brake" type="float" setter="set_brake" getter="get_brake" default="0.0">
diff --git a/doc/classes/VehicleWheel3D.xml b/doc/classes/VehicleWheel3D.xml
index 77cb5ca9f8..92b2297bf4 100644
--- a/doc/classes/VehicleWheel3D.xml
+++ b/doc/classes/VehicleWheel3D.xml
@@ -8,7 +8,7 @@
 		[b]Note:[/b] This class has known issues and isn't designed to provide realistic 3D vehicle physics. If you want advanced vehicle physics, you may need to write your own physics integration using another [PhysicsBody3D] class.
 	</description>
 	<tutorials>
-		<link title="3D Truck Town Demo">https://godotengine.org/asset-library/asset/524</link>
+		<link title="3D Truck Town Demo">https://godotengine.org/asset-library/asset/2752</link>
 	</tutorials>
 	<methods>
 		<method name="get_contact_body" qualifiers="const">
diff --git a/doc/classes/Viewport.xml b/doc/classes/Viewport.xml
index dcc817427b..cf33eac9c7 100644
--- a/doc/classes/Viewport.xml
+++ b/doc/classes/Viewport.xml
@@ -13,12 +13,12 @@
 	<tutorials>
 		<link title="Using Viewports">$DOCS_URL/tutorials/rendering/viewports.html</link>
 		<link title="Viewport and canvas transforms">$DOCS_URL/tutorials/2d/2d_transforms.html</link>
-		<link title="GUI in 3D Demo">https://godotengine.org/asset-library/asset/127</link>
-		<link title="3D in 2D Demo">https://godotengine.org/asset-library/asset/128</link>
-		<link title="2D in 3D Demo">https://godotengine.org/asset-library/asset/129</link>
-		<link title="Screen Capture Demo">https://godotengine.org/asset-library/asset/130</link>
-		<link title="Dynamic Split Screen Demo">https://godotengine.org/asset-library/asset/541</link>
-		<link title="3D Viewport Scaling Demo">https://godotengine.org/asset-library/asset/586</link>
+		<link title="GUI in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2807</link>
+		<link title="3D in 2D Viewport Demo">https://godotengine.org/asset-library/asset/2804</link>
+		<link title="2D in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2803</link>
+		<link title="Screen Capture Demo">https://godotengine.org/asset-library/asset/2808</link>
+		<link title="Dynamic Split Screen Demo">https://godotengine.org/asset-library/asset/2806</link>
+		<link title="3D Resolution Scaling Demo">https://godotengine.org/asset-library/asset/2805</link>
 	</tutorials>
 	<methods>
 		<method name="find_world_2d" qualifiers="const">
@@ -347,12 +347,17 @@
 			Sets the screen-space antialiasing method used. Screen-space antialiasing works by selectively blurring edges in a post-process shader. It differs from MSAA which takes multiple coverage samples while rendering objects. Screen-space AA methods are typically faster than MSAA and will smooth out specular aliasing, but tend to make scenes appear blurry.
 		</member>
 		<member name="sdf_oversize" type="int" setter="set_sdf_oversize" getter="get_sdf_oversize" enum="Viewport.SDFOversize" default="1">
+			Controls how much of the original viewport's size should be covered by the 2D signed distance field. This SDF can be sampled in [CanvasItem] shaders and is also used for [GPUParticles2D] collision. Higher values allow portions of occluders located outside the viewport to still be taken into account in the generated signed distance field, at the cost of performance. If you notice particles falling through [LightOccluder2D]s as the occluders leave the viewport, increase this setting.
+			The percentage is added on each axis and on both sides. For example, with the default [constant SDF_OVERSIZE_120_PERCENT], the signed distance field will cover 20% of the viewport's size outside the viewport on each side (top, right, bottom, left).
 		</member>
 		<member name="sdf_scale" type="int" setter="set_sdf_scale" getter="get_sdf_scale" enum="Viewport.SDFScale" default="1">
+			The resolution scale to use for the 2D signed distance field. Higher values lead to a more precise and more stable signed distance field as the camera moves, at the cost of performance.
 		</member>
 		<member name="snap_2d_transforms_to_pixel" type="bool" setter="set_snap_2d_transforms_to_pixel" getter="is_snap_2d_transforms_to_pixel_enabled" default="false">
+			If [code]true[/code], [CanvasItem] nodes will internally snap to full pixels. Their position can still be sub-pixel, but the decimals will not have effect. This can lead to a crisper appearance at the cost of less smooth movement, especially when [Camera2D] smoothing is enabled.
 		</member>
 		<member name="snap_2d_vertices_to_pixel" type="bool" setter="set_snap_2d_vertices_to_pixel" getter="is_snap_2d_vertices_to_pixel_enabled" default="false">
+			If [code]true[/code], vertices of [CanvasItem] nodes will snap to full pixels. Only affects the final vertex positions, not the transforms. This can lead to a crisper appearance at the cost of less smooth movement, especially when [Camera2D] smoothing is enabled.
 		</member>
 		<member name="texture_mipmap_bias" type="float" setter="set_texture_mipmap_bias" getter="get_texture_mipmap_bias" default="0.0">
 			Affects the final texture sharpness by reading from a lower or higher mipmap (also called "texture LOD bias"). Negative values make mipmapped textures sharper but grainier when viewed at a distance, while positive values make mipmapped textures blurrier (even when up close).
@@ -389,7 +394,7 @@
 		<member name="vrs_texture" type="Texture2D" setter="set_vrs_texture" getter="get_vrs_texture">
 			Texture to use when [member vrs_mode] is set to [constant Viewport.VRS_TEXTURE].
 			The texture [i]must[/i] use a lossless compression format so that colors can be matched precisely. The following VRS densities are mapped to various colors, with brighter colors representing a lower level of shading precision:
-			[codeblock]
+			[codeblock lang=text]
 			- 1×1 = rgb(0, 0, 0)     - #000000
 			- 1×2 = rgb(0, 85, 0)    - #005500
 			- 2×1 = rgb(85, 0, 0)    - #550000
@@ -515,14 +520,16 @@
 			Objects are displayed without light information.
 		</constant>
 		<constant name="DEBUG_DRAW_LIGHTING" value="2" enum="DebugDraw">
+			Objects are displayed without textures and only with lighting information.
 		</constant>
 		<constant name="DEBUG_DRAW_OVERDRAW" value="3" enum="DebugDraw">
 			Objects are displayed semi-transparent with additive blending so you can see where they are drawing over top of one another. A higher overdraw means you are wasting performance on drawing pixels that are being hidden behind others.
 		</constant>
 		<constant name="DEBUG_DRAW_WIREFRAME" value="4" enum="DebugDraw">
-			Objects are displayed in wireframe style.
+			Objects are displayed as wireframe models.
 		</constant>
 		<constant name="DEBUG_DRAW_NORMAL_BUFFER" value="5" enum="DebugDraw">
+			Objects are displayed without lighting information and their textures replaced by normal mapping.
 		</constant>
 		<constant name="DEBUG_DRAW_VOXEL_GI_ALBEDO" value="6" enum="DebugDraw">
 			Objects are displayed with only the albedo value from [VoxelGI]s.
@@ -540,6 +547,7 @@
 			Draws the shadow atlas that stores shadows from [DirectionalLight3D]s in the upper left quadrant of the [Viewport].
 		</constant>
 		<constant name="DEBUG_DRAW_SCENE_LUMINANCE" value="11" enum="DebugDraw">
+			Draws the scene luminance buffer (if available) in the upper left quadrant of the [Viewport].
 		</constant>
 		<constant name="DEBUG_DRAW_SSAO" value="12" enum="DebugDraw">
 			Draws the screen-space ambient occlusion texture instead of the scene so that you can clearly see how it is affecting objects. In order for this display mode to work, you must have [member Environment.ssao_enabled] set in your [WorldEnvironment].
@@ -554,24 +562,36 @@
 			Draws the decal atlas used by [Decal]s and light projector textures in the upper left quadrant of the [Viewport].
 		</constant>
 		<constant name="DEBUG_DRAW_SDFGI" value="16" enum="DebugDraw">
+			Draws the cascades used to render signed distance field global illumination (SDFGI).
+			Does nothing if the current environment's [member Environment.sdfgi_enabled] is [code]false[/code] or SDFGI is not supported on the platform.
 		</constant>
 		<constant name="DEBUG_DRAW_SDFGI_PROBES" value="17" enum="DebugDraw">
+			Draws the probes used for signed distance field global illumination (SDFGI).
+			Does nothing if the current environment's [member Environment.sdfgi_enabled] is [code]false[/code] or SDFGI is not supported on the platform.
 		</constant>
 		<constant name="DEBUG_DRAW_GI_BUFFER" value="18" enum="DebugDraw">
+			Draws the buffer used for global illumination (GI).
 		</constant>
 		<constant name="DEBUG_DRAW_DISABLE_LOD" value="19" enum="DebugDraw">
+			Draws all of the objects at their highest polycount, without low level of detail (LOD).
 		</constant>
 		<constant name="DEBUG_DRAW_CLUSTER_OMNI_LIGHTS" value="20" enum="DebugDraw">
+			Draws the cluster used by [OmniLight3D] nodes to optimize light rendering.
 		</constant>
 		<constant name="DEBUG_DRAW_CLUSTER_SPOT_LIGHTS" value="21" enum="DebugDraw">
+			Draws the cluster used by [SpotLight3D] nodes to optimize light rendering.
 		</constant>
 		<constant name="DEBUG_DRAW_CLUSTER_DECALS" value="22" enum="DebugDraw">
+			Draws the cluster used by [Decal] nodes to optimize decal rendering.
 		</constant>
 		<constant name="DEBUG_DRAW_CLUSTER_REFLECTION_PROBES" value="23" enum="DebugDraw">
+			Draws the cluster used by [ReflectionProbe] nodes to optimize decal rendering.
 		</constant>
 		<constant name="DEBUG_DRAW_OCCLUDERS" value="24" enum="DebugDraw">
+			Draws the buffer used for occlusion culling.
 		</constant>
 		<constant name="DEBUG_DRAW_MOTION_VECTORS" value="25" enum="DebugDraw">
+			Draws vector lines over the viewport to indicate the movement of pixels between frames.
 		</constant>
 		<constant name="DEBUG_DRAW_INTERNAL_BUFFER" value="26" enum="DebugDraw">
 			Draws the internal resolution buffer of the scene before post-processing is applied.
@@ -591,7 +611,7 @@
 			Use this for non-pixel art textures that may be viewed at a low scale (e.g. due to [Camera2D] zoom or sprite scaling), as mipmaps are important to smooth out pixels that are smaller than on-screen pixels.
 		</constant>
 		<constant name="DEFAULT_CANVAS_ITEM_TEXTURE_FILTER_MAX" value="4" enum="DefaultCanvasItemTextureFilter">
-			Max value for [enum DefaultCanvasItemTextureFilter] enum.
+			Represents the size of the [enum DefaultCanvasItemTextureFilter] enum.
 		</constant>
 		<constant name="DEFAULT_CANVAS_ITEM_TEXTURE_REPEAT_DISABLED" value="0" enum="DefaultCanvasItemTextureRepeat">
 			Disables textures repeating. Instead, when reading UVs outside the 0-1 range, the value will be clamped to the edge of the texture, resulting in a stretched out look at the borders of the texture.
@@ -603,34 +623,43 @@
 			Flip the texture when repeating so that the edge lines up instead of abruptly changing.
 		</constant>
 		<constant name="DEFAULT_CANVAS_ITEM_TEXTURE_REPEAT_MAX" value="3" enum="DefaultCanvasItemTextureRepeat">
-			Max value for [enum DefaultCanvasItemTextureRepeat] enum.
+			Represents the size of the [enum DefaultCanvasItemTextureRepeat] enum.
 		</constant>
 		<constant name="SDF_OVERSIZE_100_PERCENT" value="0" enum="SDFOversize">
+			The signed distance field only covers the viewport's own rectangle.
 		</constant>
 		<constant name="SDF_OVERSIZE_120_PERCENT" value="1" enum="SDFOversize">
+			The signed distance field is expanded to cover 20% of the viewport's size around the borders.
 		</constant>
 		<constant name="SDF_OVERSIZE_150_PERCENT" value="2" enum="SDFOversize">
+			The signed distance field is expanded to cover 50% of the viewport's size around the borders.
 		</constant>
 		<constant name="SDF_OVERSIZE_200_PERCENT" value="3" enum="SDFOversize">
+			The signed distance field is expanded to cover 100% (double) of the viewport's size around the borders.
 		</constant>
 		<constant name="SDF_OVERSIZE_MAX" value="4" enum="SDFOversize">
+			Represents the size of the [enum SDFOversize] enum.
 		</constant>
 		<constant name="SDF_SCALE_100_PERCENT" value="0" enum="SDFScale">
+			The signed distance field is rendered at full resolution.
 		</constant>
 		<constant name="SDF_SCALE_50_PERCENT" value="1" enum="SDFScale">
+			The signed distance field is rendered at half the resolution of this viewport.
 		</constant>
 		<constant name="SDF_SCALE_25_PERCENT" value="2" enum="SDFScale">
+			The signed distance field is rendered at a quarter the resolution of this viewport.
 		</constant>
 		<constant name="SDF_SCALE_MAX" value="3" enum="SDFScale">
+			Represents the size of the [enum SDFScale] enum.
 		</constant>
 		<constant name="VRS_DISABLED" value="0" enum="VRSMode">
-			VRS is disabled.
+			Variable Rate Shading is disabled.
 		</constant>
 		<constant name="VRS_TEXTURE" value="1" enum="VRSMode">
-			VRS uses a texture. Note, for stereoscopic use a texture atlas with a texture for each view.
+			Variable Rate Shading uses a texture. Note, for stereoscopic use a texture atlas with a texture for each view.
 		</constant>
 		<constant name="VRS_XR" value="2" enum="VRSMode">
-			VRS texture is supplied by the primary [XRInterface].
+			Variable Rate Shading's texture is supplied by the primary [XRInterface].
 		</constant>
 		<constant name="VRS_MAX" value="3" enum="VRSMode">
 			Represents the size of the [enum VRSMode] enum.
diff --git a/doc/classes/ViewportTexture.xml b/doc/classes/ViewportTexture.xml
index e12933d64b..ba2352ab61 100644
--- a/doc/classes/ViewportTexture.xml
+++ b/doc/classes/ViewportTexture.xml
@@ -4,20 +4,21 @@
 		Provides the content of a [Viewport] as a dynamic texture.
 	</brief_description>
 	<description>
-		Provides the content of a [Viewport] as a dynamic [Texture2D]. This can be used to mix controls, 2D game objects, and 3D game objects in the same scene.
-		To create a [ViewportTexture] in code, use the [method Viewport.get_texture] method on the target viewport.
+		A [ViewportTexture] provides the content of a [Viewport] as a dynamic [Texture2D]. This can be used to combine the rendering of [Control], [Node2D] and [Node3D] nodes. For example, you can use this texture to display a 3D scene inside a [TextureRect], or a 2D overlay in a [Sprite3D].
+		To get a [ViewportTexture] in code, use the [method Viewport.get_texture] method on the target viewport.
 		[b]Note:[/b] A [ViewportTexture] is always local to its scene (see [member Resource.resource_local_to_scene]). If the scene root is not ready, it may return incorrect data (see [signal Node.ready]).
+		[b]Note:[/b] Instantiating scenes containing a high-resolution [ViewportTexture] may cause noticeable stutter.
 	</description>
 	<tutorials>
-		<link title="GUI in 3D Demo">https://godotengine.org/asset-library/asset/127</link>
-		<link title="3D in 2D Demo">https://godotengine.org/asset-library/asset/128</link>
-		<link title="2D in 3D Demo">https://godotengine.org/asset-library/asset/129</link>
-		<link title="3D Viewport Scaling Demo">https://godotengine.org/asset-library/asset/586</link>
+		<link title="GUI in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2807</link>
+		<link title="3D in 2D Viewport Demo">https://godotengine.org/asset-library/asset/2804</link>
+		<link title="2D in 3D Viewport Demo">https://godotengine.org/asset-library/asset/2803</link>
+		<link title="3D Resolution Scaling Demo">https://godotengine.org/asset-library/asset/2805</link>
 	</tutorials>
 	<members>
 		<member name="viewport_path" type="NodePath" setter="set_viewport_path_in_scene" getter="get_viewport_path_in_scene" default="NodePath(&quot;&quot;)">
-			The path to the [Viewport] node to display. This is relative to the scene root, not to the node that uses the texture.
-			[b]Note:[/b] In the editor, this path is automatically updated when the target viewport or one of its ancestors is renamed or moved. At runtime, the path may not be able to automatically update due to the inability to determine the scene root.
+			The path to the [Viewport] node to display. This is relative to the local scene root (see [method Resource.get_local_scene]), [b]not[/b] to the nodes that use this texture.
+			[b]Note:[/b] In the editor, this path is automatically updated when the target viewport or one of its ancestors is renamed or moved. At runtime, this path may not automatically update if the scene root cannot be found.
 		</member>
 	</members>
 </class>
diff --git a/doc/classes/VisibleOnScreenNotifier2D.xml b/doc/classes/VisibleOnScreenNotifier2D.xml
index 9d22bf6cff..cf4a0c0018 100644
--- a/doc/classes/VisibleOnScreenNotifier2D.xml
+++ b/doc/classes/VisibleOnScreenNotifier2D.xml
@@ -9,7 +9,7 @@
 		[b]Note:[/b] [VisibleOnScreenNotifier2D] uses the render culling code to determine whether it's visible on screen, so it won't function unless [member CanvasItem.visible] is set to [code]true[/code].
 	</description>
 	<tutorials>
-		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/515</link>
+		<link title="2D Dodge The Creeps Demo">https://godotengine.org/asset-library/asset/2712</link>
 	</tutorials>
 	<methods>
 		<method name="is_on_screen" qualifiers="const">
diff --git a/doc/classes/VoxelGI.xml b/doc/classes/VoxelGI.xml
index 9e4026a750..93679cccf6 100644
--- a/doc/classes/VoxelGI.xml
+++ b/doc/classes/VoxelGI.xml
@@ -12,7 +12,7 @@
 	</description>
 	<tutorials>
 		<link title="Using Voxel global illumination">$DOCS_URL/tutorials/3d/global_illumination/using_voxel_gi.html</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="bake">
diff --git a/doc/classes/VoxelGIData.xml b/doc/classes/VoxelGIData.xml
index 088854e8f4..43bf04b1f5 100644
--- a/doc/classes/VoxelGIData.xml
+++ b/doc/classes/VoxelGIData.xml
@@ -8,7 +8,7 @@
 		[b]Note:[/b] To prevent text-based scene files ([code].tscn[/code]) from growing too much and becoming slow to load and save, always save [VoxelGIData] to an external binary resource file ([code].res[/code]) instead of embedding it within the scene. This can be done by clicking the dropdown arrow next to the [VoxelGIData] resource, choosing [b]Edit[/b], clicking the floppy disk icon at the top of the Inspector then choosing [b]Save As...[/b].
 	</description>
 	<tutorials>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<methods>
 		<method name="allocate">
diff --git a/doc/classes/WorldEnvironment.xml b/doc/classes/WorldEnvironment.xml
index c7ee160174..80dc46ae93 100644
--- a/doc/classes/WorldEnvironment.xml
+++ b/doc/classes/WorldEnvironment.xml
@@ -10,9 +10,8 @@
 	</description>
 	<tutorials>
 		<link title="Environment and post-processing">$DOCS_URL/tutorials/3d/environment_and_post_processing.html</link>
-		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/123</link>
-		<link title="2D HDR Demo">https://godotengine.org/asset-library/asset/110</link>
-		<link title="Third Person Shooter Demo">https://godotengine.org/asset-library/asset/678</link>
+		<link title="3D Material Testers Demo">https://godotengine.org/asset-library/asset/2742</link>
+		<link title="Third Person Shooter (TPS) Demo">https://godotengine.org/asset-library/asset/2710</link>
 	</tutorials>
 	<members>
 		<member name="camera_attributes" type="CameraAttributes" setter="set_camera_attributes" getter="get_camera_attributes">
diff --git a/doc/tools/make_rst.py b/doc/tools/make_rst.py
index 0d2bc87f55..d7230db6a6 100755
--- a/doc/tools/make_rst.py
+++ b/doc/tools/make_rst.py
@@ -87,6 +87,7 @@ BASE_STRINGS = [
     "This method may be changed or removed in future versions.",
     "This operator may be changed or removed in future versions.",
     "This theme property may be changed or removed in future versions.",
+    "[b]Note:[/b] The returned array is [i]copied[/i] and any changes to it will not update the original property value. See [%s] for more details.",
 ]
 strings_l10n: Dict[str, str] = {}
 
@@ -145,6 +146,18 @@ CLASSES_WITH_CSHARP_DIFFERENCES: List[str] = [
     "Variant",
 ]
 
+PACKED_ARRAY_TYPES: List[str] = [
+    "PackedByteArray",
+    "PackedColorArray",
+    "PackedFloat32Array",
+    "Packedfloat64Array",
+    "PackedInt32Array",
+    "PackedInt64Array",
+    "PackedStringArray",
+    "PackedVector2Array",
+    "PackedVector3Array",
+]
+
 
 class State:
     def __init__(self) -> None:
@@ -1277,6 +1290,9 @@ def make_rst_class(class_def: ClassDef, state: State, dry_run: bool, output_dir:
 
                 if property_def.text is not None and property_def.text.strip() != "":
                     f.write(f"{format_text_block(property_def.text.strip(), property_def, state)}\n\n")
+                    if property_def.type_name.type_name in PACKED_ARRAY_TYPES:
+                        tmp = f"[b]Note:[/b] The returned array is [i]copied[/i] and any changes to it will not update the original property value. See [{property_def.type_name.type_name}] for more details."
+                        f.write(f"{format_text_block(tmp, property_def, state)}\n\n")
                 elif property_def.deprecated is None and property_def.experimental is None:
                     f.write(".. container:: contribute\n\n\t")
                     f.write(
diff --git a/drivers/gles3/effects/cubemap_filter.cpp b/drivers/gles3/effects/cubemap_filter.cpp
new file mode 100644
index 0000000000..b88e655492
--- /dev/null
+++ b/drivers/gles3/effects/cubemap_filter.cpp
@@ -0,0 +1,209 @@
+/**************************************************************************/
+/*  cubemap_filter.cpp                                                    */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifdef GLES3_ENABLED
+
+#include "cubemap_filter.h"
+
+#include "../storage/texture_storage.h"
+#include "core/config/project_settings.h"
+
+using namespace GLES3;
+
+CubemapFilter *CubemapFilter::singleton = nullptr;
+
+CubemapFilter::CubemapFilter() {
+	singleton = this;
+	ggx_samples = GLOBAL_GET("rendering/reflections/sky_reflections/ggx_samples");
+
+	{
+		String defines;
+		defines += "\n#define MAX_SAMPLE_COUNT " + itos(ggx_samples) + "\n";
+		cubemap_filter.shader.initialize(defines);
+		cubemap_filter.shader_version = cubemap_filter.shader.version_create();
+	}
+
+	{ // Screen Triangle.
+		glGenBuffers(1, &screen_triangle);
+		glBindBuffer(GL_ARRAY_BUFFER, screen_triangle);
+
+		const float qv[6] = {
+			-1.0f,
+			-1.0f,
+			3.0f,
+			-1.0f,
+			-1.0f,
+			3.0f,
+		};
+
+		glBufferData(GL_ARRAY_BUFFER, sizeof(float) * 6, qv, GL_STATIC_DRAW);
+		glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind
+
+		glGenVertexArrays(1, &screen_triangle_array);
+		glBindVertexArray(screen_triangle_array);
+		glBindBuffer(GL_ARRAY_BUFFER, screen_triangle);
+		glVertexAttribPointer(RS::ARRAY_VERTEX, 2, GL_FLOAT, GL_FALSE, sizeof(float) * 2, nullptr);
+		glEnableVertexAttribArray(RS::ARRAY_VERTEX);
+		glBindVertexArray(0);
+		glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind
+	}
+}
+
+CubemapFilter::~CubemapFilter() {
+	glDeleteBuffers(1, &screen_triangle);
+	glDeleteVertexArrays(1, &screen_triangle_array);
+
+	cubemap_filter.shader.version_free(cubemap_filter.shader_version);
+	singleton = nullptr;
+}
+
+// Helper functions for IBL filtering
+
+Vector3 importance_sample_GGX(Vector2 xi, float roughness4) {
+	// Compute distribution direction
+	float phi = 2.0 * Math_PI * xi.x;
+	float cos_theta = sqrt((1.0 - xi.y) / (1.0 + (roughness4 - 1.0) * xi.y));
+	float sin_theta = sqrt(1.0 - cos_theta * cos_theta);
+
+	// Convert to spherical direction
+	Vector3 half_vector;
+	half_vector.x = sin_theta * cos(phi);
+	half_vector.y = sin_theta * sin(phi);
+	half_vector.z = cos_theta;
+
+	return half_vector;
+}
+
+float distribution_GGX(float NdotH, float roughness4) {
+	float NdotH2 = NdotH * NdotH;
+	float denom = (NdotH2 * (roughness4 - 1.0) + 1.0);
+	denom = Math_PI * denom * denom;
+
+	return roughness4 / denom;
+}
+
+float radical_inverse_vdC(uint32_t bits) {
+	bits = (bits << 16) | (bits >> 16);
+	bits = ((bits & 0x55555555) << 1) | ((bits & 0xAAAAAAAA) >> 1);
+	bits = ((bits & 0x33333333) << 2) | ((bits & 0xCCCCCCCC) >> 2);
+	bits = ((bits & 0x0F0F0F0F) << 4) | ((bits & 0xF0F0F0F0) >> 4);
+	bits = ((bits & 0x00FF00FF) << 8) | ((bits & 0xFF00FF00) >> 8);
+
+	return float(bits) * 2.3283064365386963e-10;
+}
+
+Vector2 hammersley(uint32_t i, uint32_t N) {
+	return Vector2(float(i) / float(N), radical_inverse_vdC(i));
+}
+
+void CubemapFilter::filter_radiance(GLuint p_source_cubemap, GLuint p_dest_cubemap, GLuint p_dest_framebuffer, int p_source_size, int p_mipmap_count, int p_layer) {
+	glActiveTexture(GL_TEXTURE0);
+	glBindTexture(GL_TEXTURE_CUBE_MAP, p_source_cubemap);
+	glBindFramebuffer(GL_FRAMEBUFFER, p_dest_framebuffer);
+
+	CubemapFilterShaderGLES3::ShaderVariant mode = CubemapFilterShaderGLES3::MODE_DEFAULT;
+
+	if (p_layer == 0) {
+		glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
+		// Copy over base layer without filtering.
+		mode = CubemapFilterShaderGLES3::MODE_COPY;
+	}
+
+	int size = p_source_size >> p_layer;
+	glViewport(0, 0, size, size);
+	glBindVertexArray(screen_triangle_array);
+
+	bool success = cubemap_filter.shader.version_bind_shader(cubemap_filter.shader_version, mode);
+	if (!success) {
+		return;
+	}
+
+	if (p_layer > 0) {
+		const uint32_t sample_counts[4] = { 1, ggx_samples / 4, ggx_samples / 2, ggx_samples };
+		uint32_t sample_count = sample_counts[MIN(3, p_layer)];
+
+		float roughness = float(p_layer) / (p_mipmap_count);
+		float roughness4 = roughness * roughness;
+		roughness4 *= roughness4;
+
+		float solid_angle_texel = 4.0 * Math_PI / float(6 * size * size);
+
+		LocalVector<float> sample_directions;
+		sample_directions.resize(4 * sample_count);
+
+		uint32_t index = 0;
+		float weight = 0.0;
+		for (uint32_t i = 0; i < sample_count; i++) {
+			Vector2 xi = hammersley(i, sample_count);
+			Vector3 dir = importance_sample_GGX(xi, roughness4);
+			Vector3 light_vec = (2.0 * dir.z * dir - Vector3(0.0, 0.0, 1.0));
+
+			if (light_vec.z < 0.0) {
+				continue;
+			}
+
+			sample_directions[index * 4] = light_vec.x;
+			sample_directions[index * 4 + 1] = light_vec.y;
+			sample_directions[index * 4 + 2] = light_vec.z;
+
+			float D = distribution_GGX(dir.z, roughness4);
+			float pdf = D * dir.z / (4.0 * dir.z) + 0.0001;
+
+			float solid_angle_sample = 1.0 / (float(sample_count) * pdf + 0.0001);
+
+			float mip_level = MAX(0.5 * log2(solid_angle_sample / solid_angle_texel) + float(MAX(1, p_layer - 3)), 1.0);
+
+			sample_directions[index * 4 + 3] = mip_level;
+			weight += light_vec.z;
+			index++;
+		}
+
+		glUniform4fv(cubemap_filter.shader.version_get_uniform(CubemapFilterShaderGLES3::SAMPLE_DIRECTIONS_MIP, cubemap_filter.shader_version, mode), sample_count, sample_directions.ptr());
+		cubemap_filter.shader.version_set_uniform(CubemapFilterShaderGLES3::WEIGHT, weight, cubemap_filter.shader_version, mode);
+		cubemap_filter.shader.version_set_uniform(CubemapFilterShaderGLES3::SAMPLE_COUNT, index, cubemap_filter.shader_version, mode);
+	}
+
+	for (int i = 0; i < 6; i++) {
+		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, p_dest_cubemap, p_layer);
+#ifdef DEBUG_ENABLED
+		GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
+		if (status != GL_FRAMEBUFFER_COMPLETE) {
+			WARN_PRINT("Could not bind sky radiance face: " + itos(i) + ", status: " + GLES3::TextureStorage::get_singleton()->get_framebuffer_error(status));
+		}
+#endif
+		cubemap_filter.shader.version_set_uniform(CubemapFilterShaderGLES3::FACE_ID, i, cubemap_filter.shader_version, mode);
+
+		glDrawArrays(GL_TRIANGLES, 0, 3);
+	}
+	glBindVertexArray(0);
+	glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo);
+}
+
+#endif // GLES3_ENABLED
diff --git a/drivers/gles3/effects/cubemap_filter.h b/drivers/gles3/effects/cubemap_filter.h
new file mode 100644
index 0000000000..eaaa6f4075
--- /dev/null
+++ b/drivers/gles3/effects/cubemap_filter.h
@@ -0,0 +1,70 @@
+/**************************************************************************/
+/*  cubemap_filter.h                                                      */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef CUBEMAP_FILTER_GLES3_H
+#define CUBEMAP_FILTER_GLES3_H
+
+#ifdef GLES3_ENABLED
+
+#include "drivers/gles3/shaders/effects/cubemap_filter.glsl.gen.h"
+
+namespace GLES3 {
+
+class CubemapFilter {
+private:
+	struct CMF {
+		CubemapFilterShaderGLES3 shader;
+		RID shader_version;
+	} cubemap_filter;
+
+	static CubemapFilter *singleton;
+
+	// Use for full-screen effects. Slightly more efficient than screen_quad as this eliminates pixel overdraw along the diagonal.
+	GLuint screen_triangle = 0;
+	GLuint screen_triangle_array = 0;
+
+	uint32_t ggx_samples = 128;
+
+public:
+	static CubemapFilter *get_singleton() {
+		return singleton;
+	}
+
+	CubemapFilter();
+	~CubemapFilter();
+
+	void filter_radiance(GLuint p_source_cubemap, GLuint p_dest_cubemap, GLuint p_dest_framebuffer, int p_source_size, int p_mipmap_count, int p_layer);
+};
+
+} //namespace GLES3
+
+#endif // GLES3_ENABLED
+
+#endif // CUBEMAP_FILTER_GLES3_H
diff --git a/drivers/gles3/rasterizer_gles3.cpp b/drivers/gles3/rasterizer_gles3.cpp
index cee4f93b3d..767a394ce5 100644
--- a/drivers/gles3/rasterizer_gles3.cpp
+++ b/drivers/gles3/rasterizer_gles3.cpp
@@ -208,6 +208,7 @@ void RasterizerGLES3::finalize() {
 	memdelete(fog);
 	memdelete(post_effects);
 	memdelete(glow);
+	memdelete(cubemap_filter);
 	memdelete(copy_effects);
 	memdelete(light_storage);
 	memdelete(particles_storage);
@@ -354,6 +355,7 @@ RasterizerGLES3::RasterizerGLES3() {
 	particles_storage = memnew(GLES3::ParticlesStorage);
 	light_storage = memnew(GLES3::LightStorage);
 	copy_effects = memnew(GLES3::CopyEffects);
+	cubemap_filter = memnew(GLES3::CubemapFilter);
 	glow = memnew(GLES3::Glow);
 	post_effects = memnew(GLES3::PostEffects);
 	gi = memnew(GLES3::GI);
diff --git a/drivers/gles3/rasterizer_gles3.h b/drivers/gles3/rasterizer_gles3.h
index 8d52dc2365..09d3c7bd52 100644
--- a/drivers/gles3/rasterizer_gles3.h
+++ b/drivers/gles3/rasterizer_gles3.h
@@ -34,6 +34,7 @@
 #ifdef GLES3_ENABLED
 
 #include "effects/copy_effects.h"
+#include "effects/cubemap_filter.h"
 #include "effects/glow.h"
 #include "effects/post_effects.h"
 #include "environment/fog.h"
@@ -70,6 +71,7 @@ protected:
 	GLES3::GI *gi = nullptr;
 	GLES3::Fog *fog = nullptr;
 	GLES3::CopyEffects *copy_effects = nullptr;
+	GLES3::CubemapFilter *cubemap_filter = nullptr;
 	GLES3::Glow *glow = nullptr;
 	GLES3::PostEffects *post_effects = nullptr;
 	RasterizerCanvasGLES3 *canvas = nullptr;
diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp
index b8cc3928eb..ecb563214c 100644
--- a/drivers/gles3/rasterizer_scene_gles3.cpp
+++ b/drivers/gles3/rasterizer_scene_gles3.cpp
@@ -65,7 +65,7 @@ RenderGeometryInstance *RasterizerSceneGLES3::geometry_instance_create(RID p_bas
 }
 
 uint32_t RasterizerSceneGLES3::geometry_instance_get_pair_mask() {
-	return (1 << RS::INSTANCE_LIGHT);
+	return ((1 << RS::INSTANCE_LIGHT) | (1 << RS::INSTANCE_REFLECTION_PROBE));
 }
 
 void RasterizerSceneGLES3::GeometryInstanceGLES3::pair_light_instances(const RID *p_light_instances, uint32_t p_light_instance_count) {
@@ -97,6 +97,14 @@ void RasterizerSceneGLES3::GeometryInstanceGLES3::pair_light_instances(const RID
 	}
 }
 
+void RasterizerSceneGLES3::GeometryInstanceGLES3::pair_reflection_probe_instances(const RID *p_reflection_probe_instances, uint32_t p_reflection_probe_instance_count) {
+	paired_reflection_probes.clear();
+
+	for (uint32_t i = 0; i < p_reflection_probe_instance_count; i++) {
+		paired_reflection_probes.push_back(p_reflection_probe_instances[i]);
+	}
+}
+
 void RasterizerSceneGLES3::geometry_instance_free(RenderGeometryInstance *p_geometry_instance) {
 	GeometryInstanceGLES3 *ginstance = static_cast<GeometryInstanceGLES3 *>(p_geometry_instance);
 	ERR_FAIL_NULL(ginstance);
@@ -854,6 +862,7 @@ void RasterizerSceneGLES3::_draw_sky(RID p_env, const Projection &p_projection,
 }
 
 void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_sky_energy_multiplier) {
+	GLES3::CubemapFilter *cubemap_filter = GLES3::CubemapFilter::get_singleton();
 	GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton();
 	ERR_FAIL_COND(p_env.is_null());
 
@@ -970,10 +979,10 @@ void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_p
 
 		if (update_single_frame) {
 			for (int i = 0; i < max_processing_layer; i++) {
-				_filter_sky_radiance(sky, i);
+				cubemap_filter->filter_radiance(sky->raw_radiance, sky->radiance, sky->radiance_framebuffer, sky->radiance_size, sky->mipmap_count, i);
 			}
 		} else {
-			_filter_sky_radiance(sky, 0); //Just copy over the first mipmap
+			cubemap_filter->filter_radiance(sky->raw_radiance, sky->radiance, sky->radiance_framebuffer, sky->radiance_size, sky->mipmap_count, 0); // Just copy over the first mipmap.
 		}
 		sky->processing_layer = 1;
 		sky->baked_exposure = p_sky_energy_multiplier;
@@ -984,135 +993,11 @@ void RasterizerSceneGLES3::_update_sky_radiance(RID p_env, const Projection &p_p
 			scene_state.set_gl_cull_mode(GLES3::SceneShaderData::CULL_DISABLED);
 			scene_state.enable_gl_blend(false);
 
-			_filter_sky_radiance(sky, sky->processing_layer);
+			cubemap_filter->filter_radiance(sky->raw_radiance, sky->radiance, sky->radiance_framebuffer, sky->radiance_size, sky->mipmap_count, sky->processing_layer);
 			sky->processing_layer++;
 		}
 	}
-}
-
-// Helper functions for IBL filtering
-
-Vector3 importance_sample_GGX(Vector2 xi, float roughness4) {
-	// Compute distribution direction
-	float phi = 2.0 * Math_PI * xi.x;
-	float cos_theta = sqrt((1.0 - xi.y) / (1.0 + (roughness4 - 1.0) * xi.y));
-	float sin_theta = sqrt(1.0 - cos_theta * cos_theta);
-
-	// Convert to spherical direction
-	Vector3 half_vector;
-	half_vector.x = sin_theta * cos(phi);
-	half_vector.y = sin_theta * sin(phi);
-	half_vector.z = cos_theta;
-
-	return half_vector;
-}
-
-float distribution_GGX(float NdotH, float roughness4) {
-	float NdotH2 = NdotH * NdotH;
-	float denom = (NdotH2 * (roughness4 - 1.0) + 1.0);
-	denom = Math_PI * denom * denom;
-
-	return roughness4 / denom;
-}
-
-float radical_inverse_vdC(uint32_t bits) {
-	bits = (bits << 16) | (bits >> 16);
-	bits = ((bits & 0x55555555) << 1) | ((bits & 0xAAAAAAAA) >> 1);
-	bits = ((bits & 0x33333333) << 2) | ((bits & 0xCCCCCCCC) >> 2);
-	bits = ((bits & 0x0F0F0F0F) << 4) | ((bits & 0xF0F0F0F0) >> 4);
-	bits = ((bits & 0x00FF00FF) << 8) | ((bits & 0xFF00FF00) >> 8);
-
-	return float(bits) * 2.3283064365386963e-10;
-}
-
-Vector2 hammersley(uint32_t i, uint32_t N) {
-	return Vector2(float(i) / float(N), radical_inverse_vdC(i));
-}
-
-void RasterizerSceneGLES3::_filter_sky_radiance(Sky *p_sky, int p_base_layer) {
-	GLES3::MaterialStorage *material_storage = GLES3::MaterialStorage::get_singleton();
-
-	glActiveTexture(GL_TEXTURE0);
-	glBindTexture(GL_TEXTURE_CUBE_MAP, p_sky->raw_radiance);
-	glBindFramebuffer(GL_FRAMEBUFFER, p_sky->radiance_framebuffer);
-
-	CubemapFilterShaderGLES3::ShaderVariant mode = CubemapFilterShaderGLES3::MODE_DEFAULT;
-
-	if (p_base_layer == 0) {
-		glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
-		// Copy over base layer without filtering.
-		mode = CubemapFilterShaderGLES3::MODE_COPY;
-	}
-
-	int size = p_sky->radiance_size >> p_base_layer;
-	glViewport(0, 0, size, size);
-	glBindVertexArray(sky_globals.screen_triangle_array);
-
-	bool success = material_storage->shaders.cubemap_filter_shader.version_bind_shader(scene_globals.cubemap_filter_shader_version, mode);
-	if (!success) {
-		return;
-	}
-
-	if (p_base_layer > 0) {
-		const uint32_t sample_counts[4] = { 1, sky_globals.ggx_samples / 4, sky_globals.ggx_samples / 2, sky_globals.ggx_samples };
-		uint32_t sample_count = sample_counts[MIN(3, p_base_layer)];
-
-		float roughness = float(p_base_layer) / (p_sky->mipmap_count);
-		float roughness4 = roughness * roughness;
-		roughness4 *= roughness4;
-
-		float solid_angle_texel = 4.0 * Math_PI / float(6 * size * size);
-
-		LocalVector<float> sample_directions;
-		sample_directions.resize(4 * sample_count);
-
-		uint32_t index = 0;
-		float weight = 0.0;
-		for (uint32_t i = 0; i < sample_count; i++) {
-			Vector2 xi = hammersley(i, sample_count);
-			Vector3 dir = importance_sample_GGX(xi, roughness4);
-			Vector3 light_vec = (2.0 * dir.z * dir - Vector3(0.0, 0.0, 1.0));
-
-			if (light_vec.z < 0.0) {
-				continue;
-			}
-
-			sample_directions[index * 4] = light_vec.x;
-			sample_directions[index * 4 + 1] = light_vec.y;
-			sample_directions[index * 4 + 2] = light_vec.z;
-
-			float D = distribution_GGX(dir.z, roughness4);
-			float pdf = D * dir.z / (4.0 * dir.z) + 0.0001;
-
-			float solid_angle_sample = 1.0 / (float(sample_count) * pdf + 0.0001);
-
-			float mip_level = MAX(0.5 * log2(solid_angle_sample / solid_angle_texel) + float(MAX(1, p_base_layer - 3)), 1.0);
-
-			sample_directions[index * 4 + 3] = mip_level;
-			weight += light_vec.z;
-			index++;
-		}
-
-		glUniform4fv(material_storage->shaders.cubemap_filter_shader.version_get_uniform(CubemapFilterShaderGLES3::SAMPLE_DIRECTIONS_MIP, scene_globals.cubemap_filter_shader_version, mode), sample_count, sample_directions.ptr());
-		material_storage->shaders.cubemap_filter_shader.version_set_uniform(CubemapFilterShaderGLES3::WEIGHT, weight, scene_globals.cubemap_filter_shader_version, mode);
-		material_storage->shaders.cubemap_filter_shader.version_set_uniform(CubemapFilterShaderGLES3::SAMPLE_COUNT, index, scene_globals.cubemap_filter_shader_version, mode);
-	}
-
-	for (int i = 0; i < 6; i++) {
-		glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_CUBE_MAP_POSITIVE_X + i, p_sky->radiance, p_base_layer);
-#ifdef DEBUG_ENABLED
-		GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
-		if (status != GL_FRAMEBUFFER_COMPLETE) {
-			WARN_PRINT("Could not bind sky radiance face: " + itos(i) + ", status: " + GLES3::TextureStorage::get_singleton()->get_framebuffer_error(status));
-		}
-#endif
-		material_storage->shaders.cubemap_filter_shader.version_set_uniform(CubemapFilterShaderGLES3::FACE_ID, i, scene_globals.cubemap_filter_shader_version, mode);
-
-		glDrawArrays(GL_TRIANGLES, 0, 3);
-	}
-	glBindVertexArray(0);
-	glViewport(0, 0, p_sky->screen_size.x, p_sky->screen_size.y);
-	glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo);
+	glViewport(0, 0, sky->screen_size.x, sky->screen_size.y);
 }
 
 Ref<Image> RasterizerSceneGLES3::sky_bake_panorama(RID p_sky, float p_energy, bool p_bake_irradiance, const Size2i &p_size) {
@@ -1334,6 +1219,7 @@ _FORCE_INLINE_ static uint32_t _indices_to_primitives(RS::PrimitiveType p_primit
 }
 void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const RenderDataGLES3 *p_render_data, PassMode p_pass_mode, bool p_append) {
 	GLES3::MeshStorage *mesh_storage = GLES3::MeshStorage::get_singleton();
+	GLES3::LightStorage *light_storage = GLES3::LightStorage::get_singleton();
 
 	if (p_render_list == RENDER_LIST_OPAQUE) {
 		scene_state.used_screen_texture = false;
@@ -1392,22 +1278,24 @@ void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const
 			inst->light_passes.clear();
 			inst->spot_light_gl_cache.clear();
 			inst->omni_light_gl_cache.clear();
+			inst->reflection_probes_local_transform_cache.clear();
+			inst->reflection_probe_rid_cache.clear();
 			uint64_t current_frame = RSG::rasterizer->get_frame_number();
 
 			if (inst->paired_omni_light_count) {
 				for (uint32_t j = 0; j < inst->paired_omni_light_count; j++) {
 					RID light_instance = inst->paired_omni_lights[j];
-					if (GLES3::LightStorage::get_singleton()->light_instance_get_render_pass(light_instance) != current_frame) {
+					if (light_storage->light_instance_get_render_pass(light_instance) != current_frame) {
 						continue;
 					}
-					RID light = GLES3::LightStorage::get_singleton()->light_instance_get_base_light(light_instance);
-					int32_t shadow_id = GLES3::LightStorage::get_singleton()->light_instance_get_shadow_id(light_instance);
+					RID light = light_storage->light_instance_get_base_light(light_instance);
+					int32_t shadow_id = light_storage->light_instance_get_shadow_id(light_instance);
 
-					if (GLES3::LightStorage::get_singleton()->light_has_shadow(light) && shadow_id >= 0) {
+					if (light_storage->light_has_shadow(light) && shadow_id >= 0) {
 						// Skip static lights when a lightmap is used.
-						if (!inst->lightmap_instance.is_valid() || GLES3::LightStorage::get_singleton()->light_get_bake_mode(light) != RenderingServer::LIGHT_BAKE_STATIC) {
+						if (!inst->lightmap_instance.is_valid() || light_storage->light_get_bake_mode(light) != RenderingServer::LIGHT_BAKE_STATIC) {
 							GeometryInstanceGLES3::LightPass pass;
-							pass.light_id = GLES3::LightStorage::get_singleton()->light_instance_get_gl_id(light_instance);
+							pass.light_id = light_storage->light_instance_get_gl_id(light_instance);
 							pass.shadow_id = shadow_id;
 							pass.light_instance_rid = light_instance;
 							pass.is_omni = true;
@@ -1415,7 +1303,7 @@ void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const
 						}
 					} else {
 						// Lights without shadow can all go in base pass.
-						inst->omni_light_gl_cache.push_back((uint32_t)GLES3::LightStorage::get_singleton()->light_instance_get_gl_id(light_instance));
+						inst->omni_light_gl_cache.push_back((uint32_t)light_storage->light_instance_get_gl_id(light_instance));
 					}
 				}
 			}
@@ -1423,24 +1311,42 @@ void RasterizerSceneGLES3::_fill_render_list(RenderListType p_render_list, const
 			if (inst->paired_spot_light_count) {
 				for (uint32_t j = 0; j < inst->paired_spot_light_count; j++) {
 					RID light_instance = inst->paired_spot_lights[j];
-					if (GLES3::LightStorage::get_singleton()->light_instance_get_render_pass(light_instance) != current_frame) {
+					if (light_storage->light_instance_get_render_pass(light_instance) != current_frame) {
 						continue;
 					}
-					RID light = GLES3::LightStorage::get_singleton()->light_instance_get_base_light(light_instance);
-					int32_t shadow_id = GLES3::LightStorage::get_singleton()->light_instance_get_shadow_id(light_instance);
+					RID light = light_storage->light_instance_get_base_light(light_instance);
+					int32_t shadow_id = light_storage->light_instance_get_shadow_id(light_instance);
 
-					if (GLES3::LightStorage::get_singleton()->light_has_shadow(light) && shadow_id >= 0) {
+					if (light_storage->light_has_shadow(light) && shadow_id >= 0) {
 						// Skip static lights when a lightmap is used.
-						if (!inst->lightmap_instance.is_valid() || GLES3::LightStorage::get_singleton()->light_get_bake_mode(light) != RenderingServer::LIGHT_BAKE_STATIC) {
+						if (!inst->lightmap_instance.is_valid() || light_storage->light_get_bake_mode(light) != RenderingServer::LIGHT_BAKE_STATIC) {
 							GeometryInstanceGLES3::LightPass pass;
-							pass.light_id = GLES3::LightStorage::get_singleton()->light_instance_get_gl_id(light_instance);
+							pass.light_id = light_storage->light_instance_get_gl_id(light_instance);
 							pass.shadow_id = shadow_id;
 							pass.light_instance_rid = light_instance;
 							inst->light_passes.push_back(pass);
 						}
 					} else {
 						// Lights without shadow can all go in base pass.
-						inst->spot_light_gl_cache.push_back((uint32_t)GLES3::LightStorage::get_singleton()->light_instance_get_gl_id(light_instance));
+						inst->spot_light_gl_cache.push_back((uint32_t)light_storage->light_instance_get_gl_id(light_instance));
+					}
+				}
+			}
+
+			if (p_render_data->reflection_probe.is_null() && inst->paired_reflection_probes.size() > 0) {
+				// Do not include if we're rendering reflection probes.
+				// We only support two probes for now and we handle them first come, first serve.
+				// This should be improved one day, at minimum the list should be sorted by priority.
+
+				for (uint32_t pi = 0; pi < inst->paired_reflection_probes.size(); pi++) {
+					RID probe_instance = inst->paired_reflection_probes[pi];
+					RID atlas = light_storage->reflection_probe_instance_get_atlas(probe_instance);
+					RID probe = light_storage->reflection_probe_instance_get_probe(probe_instance);
+					uint32_t reflection_mask = light_storage->reflection_probe_get_reflection_mask(probe);
+					if (atlas.is_valid() && (inst->layer_mask & reflection_mask)) {
+						Transform3D local_matrix = p_render_data->inv_cam_transform * light_storage->reflection_probe_instance_get_transform(probe_instance);
+						inst->reflection_probes_local_transform_cache.push_back(local_matrix.affine_inverse());
+						inst->reflection_probe_rid_cache.push_back(probe_instance);
 					}
 				}
 			}
@@ -2321,20 +2227,21 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 	RENDER_TIMESTAMP("Setup 3D Scene");
 
 	bool apply_color_adjustments_in_post = false;
+	bool is_reflection_probe = p_reflection_probe.is_valid();
 
-	Ref<RenderSceneBuffersGLES3> rb;
-	if (p_render_buffers.is_valid()) {
-		rb = p_render_buffers;
-		ERR_FAIL_COND(rb.is_null());
+	Ref<RenderSceneBuffersGLES3> rb = p_render_buffers;
+	ERR_FAIL_COND(rb.is_null());
 
-		if (rb->get_scaling_3d_mode() != RS::VIEWPORT_SCALING_3D_MODE_OFF) {
-			// If we're scaling, we apply tonemapping etc. in post, so disable it during rendering
-			apply_color_adjustments_in_post = true;
-		}
+	if (rb->get_scaling_3d_mode() != RS::VIEWPORT_SCALING_3D_MODE_OFF) {
+		// If we're scaling, we apply tonemapping etc. in post, so disable it during rendering
+		apply_color_adjustments_in_post = true;
 	}
 
-	GLES3::RenderTarget *rt = texture_storage->get_render_target(rb->render_target);
-	ERR_FAIL_NULL(rt);
+	GLES3::RenderTarget *rt = nullptr; // No render target for reflection probe
+	if (!is_reflection_probe) {
+		rt = texture_storage->get_render_target(rb->render_target);
+		ERR_FAIL_NULL(rt);
+	}
 
 	bool glow_enabled = false;
 	if (p_environment.is_valid() && rb.is_valid()) {
@@ -2351,7 +2258,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 	RenderDataGLES3 render_data;
 	{
 		render_data.render_buffers = rb;
-		render_data.transparent_bg = rb.is_valid() ? rt->is_transparent : false;
+		render_data.transparent_bg = rt ? rt->is_transparent : false;
 		// Our first camera is used by default
 		render_data.cam_transform = p_camera_data->main_transform;
 		render_data.inv_cam_transform = render_data.cam_transform.affine_inverse();
@@ -2381,7 +2288,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 		// this should be the same for all cameras..
 		render_data.lod_distance_multiplier = p_camera_data->main_projection.get_lod_multiplier();
 
-		if (rt->color_type == GL_UNSIGNED_INT_2_10_10_10_REV && glow_enabled) {
+		if (rt != nullptr && rt->color_type == GL_UNSIGNED_INT_2_10_10_10_REV && glow_enabled) {
 			// As our output is in sRGB and we're using 10bit color space, we can fake a little HDR to do glow...
 			render_data.luminance_multiplier = 0.25;
 		} else {
@@ -2415,7 +2322,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 	glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_GLOBALS_UNIFORM_LOCATION, global_buffer);
 
 	Color clear_color;
-	if (p_render_buffers.is_valid()) {
+	if (!is_reflection_probe && rb->render_target.is_valid()) {
 		clear_color = texture_storage->render_target_get_clear_request_color(rb->render_target);
 	} else {
 		clear_color = texture_storage->get_default_clear_color();
@@ -2448,9 +2355,9 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 
 	scene_state.ubo.emissive_exposure_normalization = -1.0; // Use default exposure normalization.
 
-	bool flip_y = !render_data.reflection_probe.is_valid();
+	bool flip_y = !is_reflection_probe;
 
-	if (rt->overridden.color.is_valid()) {
+	if (rt && rt->overridden.color.is_valid()) {
 		// If we've overridden the render target's color texture, then don't render upside down.
 		// We're probably rendering directly to an XR device.
 		flip_y = false;
@@ -2462,7 +2369,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 	_render_shadows(&render_data, screen_size);
 
 	_setup_lights(&render_data, true, render_data.directional_light_count, render_data.omni_light_count, render_data.spot_light_count, render_data.directional_shadow_count);
-	_setup_environment(&render_data, render_data.reflection_probe.is_valid(), screen_size, flip_y, clear_color, false);
+	_setup_environment(&render_data, is_reflection_probe, screen_size, flip_y, clear_color, false);
 
 	_fill_render_list(RENDER_LIST_OPAQUE, &render_data, PASS_MODE_COLOR);
 	render_list[RENDER_LIST_OPAQUE].sort_by_key();
@@ -2522,7 +2429,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 		if (draw_sky || draw_sky_fog_only || environment_get_reflection_source(render_data.environment) == RS::ENV_REFLECTION_SOURCE_SKY || environment_get_ambient_source(render_data.environment) == RS::ENV_AMBIENT_SOURCE_SKY) {
 			RENDER_TIMESTAMP("Setup Sky");
 			Projection projection = render_data.cam_projection;
-			if (render_data.reflection_probe.is_valid()) {
+			if (is_reflection_probe) {
 				Projection correction;
 				correction.set_depth_correction(true, true, false);
 				projection = correction * render_data.cam_projection;
@@ -2543,7 +2450,12 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 		}
 	}
 
-	GLuint fbo = rb->get_render_fbo();
+	GLuint fbo = 0;
+	if (is_reflection_probe) {
+		fbo = GLES3::LightStorage::get_singleton()->reflection_probe_instance_get_framebuffer(render_data.reflection_probe, render_data.reflection_probe_pass);
+	} else {
+		fbo = rb->get_render_fbo();
+	}
 
 	glBindFramebuffer(GL_FRAMEBUFFER, fbo);
 	glViewport(0, 0, rb->internal_size.x, rb->internal_size.y);
@@ -2664,10 +2576,17 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 		scene_state.enable_gl_blend(false);
 		scene_state.set_gl_cull_mode(GLES3::SceneShaderData::CULL_BACK);
 
-		_draw_sky(render_data.environment, render_data.cam_projection, render_data.cam_transform, sky_energy_multiplier, render_data.luminance_multiplier, p_camera_data->view_count > 1, flip_y, apply_color_adjustments_in_post);
+		Projection projection = render_data.cam_projection;
+		if (is_reflection_probe) {
+			Projection correction;
+			correction.columns[1][1] = -1.0;
+			projection = correction * render_data.cam_projection;
+		}
+
+		_draw_sky(render_data.environment, projection, render_data.cam_transform, sky_energy_multiplier, render_data.luminance_multiplier, p_camera_data->view_count > 1, flip_y, apply_color_adjustments_in_post);
 	}
 
-	if (scene_state.used_screen_texture || scene_state.used_depth_texture) {
+	if (rt && (scene_state.used_screen_texture || scene_state.used_depth_texture)) {
 		Size2i size;
 		GLuint backbuffer_fbo = 0;
 		GLuint backbuffer = 0;
@@ -2725,7 +2644,7 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 		glFrontFace(GL_CCW);
 	}
 
-	if (rb.is_valid()) {
+	if (!is_reflection_probe && rb.is_valid()) {
 		_render_buffers_debug_draw(rb, p_shadow_atlas, fbo);
 	}
 
@@ -2733,9 +2652,11 @@ void RasterizerSceneGLES3::render_scene(const Ref<RenderSceneBuffers> &p_render_
 	scene_state.reset_gl_state();
 	glUseProgram(0);
 
-	_render_post_processing(&render_data);
+	if (!is_reflection_probe) {
+		_render_post_processing(&render_data);
 
-	texture_storage->render_target_disable_clear_request(rb->render_target);
+		texture_storage->render_target_disable_clear_request(rb->render_target);
+	}
 
 	glActiveTexture(GL_TEXTURE0);
 }
@@ -3203,6 +3124,14 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
 						spec_constants |= SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL;
 					}
 
+					if (inst->reflection_probe_rid_cache.size() == 0) {
+						// We don't have any probes.
+						spec_constants |= SceneShaderGLES3::DISABLE_REFLECTION_PROBE;
+					} else if (inst->reflection_probe_rid_cache.size() > 1) {
+						// We have a second probe.
+						spec_constants |= SceneShaderGLES3::SECOND_REFLECTION_PROBE;
+					}
+
 					if (inst->lightmap_instance.is_valid()) {
 						spec_constants |= SceneShaderGLES3::USE_LIGHTMAP;
 
@@ -3224,6 +3153,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
 					spec_constants |= SceneShaderGLES3::DISABLE_LIGHT_SPOT;
 					spec_constants |= SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL;
 					spec_constants |= SceneShaderGLES3::DISABLE_LIGHTMAP;
+					spec_constants |= SceneShaderGLES3::DISABLE_REFLECTION_PROBE;
 				}
 
 				if (uses_additive_lighting) {
@@ -3383,6 +3313,52 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params,
 				}
 			}
 
+			// Pass in reflection probe data
+			if constexpr (p_pass_mode == PASS_MODE_COLOR || p_pass_mode == PASS_MODE_COLOR_TRANSPARENT) {
+				if (pass == 0 && inst->reflection_probe_rid_cache.size() > 0) {
+					GLES3::Config *config = GLES3::Config::get_singleton();
+					GLES3::LightStorage *light_storage = GLES3::LightStorage::get_singleton();
+
+					// Setup first probe.
+					{
+						RID probe_rid = light_storage->reflection_probe_instance_get_probe(inst->reflection_probe_rid_cache[0]);
+						GLES3::ReflectionProbe *probe = light_storage->get_reflection_probe(probe_rid);
+
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE1_USE_BOX_PROJECT, probe->box_projection, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE1_BOX_EXTENTS, probe->size * 0.5, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE1_BOX_OFFSET, probe->origin_offset, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE1_EXTERIOR, !probe->interior, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE1_INTENSITY, probe->intensity, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE1_AMBIENT_MODE, int(probe->ambient_mode), shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE1_AMBIENT_COLOR, probe->ambient_color * probe->ambient_color_energy, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE1_LOCAL_MATRIX, inst->reflection_probes_local_transform_cache[0], shader->version, instance_variant, spec_constants);
+
+						glActiveTexture(GL_TEXTURE0 + config->max_texture_image_units - 7);
+						glBindTexture(GL_TEXTURE_CUBE_MAP, light_storage->reflection_probe_instance_get_texture(inst->reflection_probe_rid_cache[0]));
+					}
+
+					if (inst->reflection_probe_rid_cache.size() > 1) {
+						// Setup second probe.
+						RID probe_rid = light_storage->reflection_probe_instance_get_probe(inst->reflection_probe_rid_cache[1]);
+						GLES3::ReflectionProbe *probe = light_storage->get_reflection_probe(probe_rid);
+
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE2_USE_BOX_PROJECT, probe->box_projection, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE2_BOX_EXTENTS, probe->size * 0.5, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE2_BOX_OFFSET, probe->origin_offset, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE2_EXTERIOR, !probe->interior, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE2_INTENSITY, probe->intensity, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE2_AMBIENT_MODE, int(probe->ambient_mode), shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE2_AMBIENT_COLOR, probe->ambient_color * probe->ambient_color_energy, shader->version, instance_variant, spec_constants);
+						material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::REFPROBE2_LOCAL_MATRIX, inst->reflection_probes_local_transform_cache[1], shader->version, instance_variant, spec_constants);
+
+						glActiveTexture(GL_TEXTURE0 + config->max_texture_image_units - 8);
+						glBindTexture(GL_TEXTURE_CUBE_MAP, light_storage->reflection_probe_instance_get_texture(inst->reflection_probe_rid_cache[1]));
+
+						spec_constants |= SceneShaderGLES3::SECOND_REFLECTION_PROBE;
+					}
+				}
+			}
+
 			material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, world_transform, shader->version, instance_variant, spec_constants);
 			{
 				GLES3::Mesh::Surface *s = reinterpret_cast<GLES3::Mesh::Surface *>(surf->surface);
@@ -4074,6 +4050,7 @@ RasterizerSceneGLES3::RasterizerSceneGLES3() {
 		global_defines += "\n#define MAX_LIGHT_DATA_STRUCTS " + itos(config->max_renderable_lights) + "\n";
 		global_defines += "\n#define MAX_DIRECTIONAL_LIGHT_DATA_STRUCTS " + itos(MAX_DIRECTIONAL_LIGHTS) + "\n";
 		global_defines += "\n#define MAX_FORWARD_LIGHTS " + itos(config->max_lights_per_object) + "u\n";
+		global_defines += "\n#define MAX_ROUGHNESS_LOD " + itos(sky_globals.roughness_layers - 1) + ".0\n";
 		material_storage->shaders.scene_shader.initialize(global_defines);
 		scene_globals.shader_default_version = material_storage->shaders.scene_shader.version_create();
 		material_storage->shaders.scene_shader.version_bind_shader(scene_globals.shader_default_version, SceneShaderGLES3::MODE_COLOR);
@@ -4129,7 +4106,6 @@ void fragment() {
 	{
 		// Initialize Sky stuff
 		sky_globals.roughness_layers = GLOBAL_GET("rendering/reflections/sky_reflections/roughness_layers");
-		sky_globals.ggx_samples = GLOBAL_GET("rendering/reflections/sky_reflections/ggx_samples");
 
 		String global_defines;
 		global_defines += "#define MAX_GLOBAL_SHADER_UNIFORMS 256\n"; // TODO: this is arbitrary for now
@@ -4139,13 +4115,6 @@ void fragment() {
 	}
 
 	{
-		String global_defines;
-		global_defines += "\n#define MAX_SAMPLE_COUNT " + itos(sky_globals.ggx_samples) + "\n";
-		material_storage->shaders.cubemap_filter_shader.initialize(global_defines);
-		scene_globals.cubemap_filter_shader_version = material_storage->shaders.cubemap_filter_shader.version_create();
-	}
-
-	{
 		sky_globals.default_shader = material_storage->shader_allocate();
 
 		material_storage->shader_initialize(sky_globals.default_shader);
@@ -4234,7 +4203,6 @@ RasterizerSceneGLES3::~RasterizerSceneGLES3() {
 
 	// Scene Shader
 	GLES3::MaterialStorage::get_singleton()->shaders.scene_shader.version_free(scene_globals.shader_default_version);
-	GLES3::MaterialStorage::get_singleton()->shaders.cubemap_filter_shader.version_free(scene_globals.cubemap_filter_shader_version);
 	RSG::material_storage->material_free(scene_globals.default_material);
 	RSG::material_storage->shader_free(scene_globals.default_shader);
 
@@ -4250,7 +4218,6 @@ RasterizerSceneGLES3::~RasterizerSceneGLES3() {
 	RSG::material_storage->shader_free(sky_globals.fog_shader);
 	GLES3::Utilities::get_singleton()->buffer_free_data(sky_globals.screen_triangle);
 	glDeleteVertexArrays(1, &sky_globals.screen_triangle_array);
-	glDeleteTextures(1, &sky_globals.radical_inverse_vdc_cache_tex);
 	GLES3::Utilities::get_singleton()->buffer_free_data(sky_globals.directional_light_buffer);
 	memdelete_arr(sky_globals.directional_lights);
 	memdelete_arr(sky_globals.last_frame_directional_lights);
diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h
index 71cd152520..cc479bd4e9 100644
--- a/drivers/gles3/rasterizer_scene_gles3.h
+++ b/drivers/gles3/rasterizer_scene_gles3.h
@@ -37,7 +37,7 @@
 #include "core/templates/paged_allocator.h"
 #include "core/templates/rid_owner.h"
 #include "core/templates/self_list.h"
-#include "drivers/gles3/shaders/cubemap_filter.glsl.gen.h"
+#include "drivers/gles3/shaders/effects/cubemap_filter.glsl.gen.h"
 #include "drivers/gles3/shaders/sky.glsl.gen.h"
 #include "scene/resources/mesh.h"
 #include "servers/rendering/renderer_compositor.h"
@@ -157,7 +157,6 @@ private:
 		RID shader_default_version;
 		RID default_material;
 		RID default_shader;
-		RID cubemap_filter_shader_version;
 		RID overdraw_material;
 		RID overdraw_shader;
 	} scene_globals;
@@ -314,6 +313,10 @@ private:
 		LocalVector<uint32_t> omni_light_gl_cache;
 		LocalVector<uint32_t> spot_light_gl_cache;
 
+		LocalVector<RID> paired_reflection_probes;
+		LocalVector<RID> reflection_probe_rid_cache;
+		LocalVector<Transform3D> reflection_probes_local_transform_cache;
+
 		RID lightmap_instance;
 		Rect2 lightmap_uv_scale;
 		uint32_t lightmap_slice_index;
@@ -331,7 +334,7 @@ private:
 		virtual void set_lightmap_capture(const Color *p_sh9) override;
 
 		virtual void pair_light_instances(const RID *p_light_instances, uint32_t p_light_instance_count) override;
-		virtual void pair_reflection_probe_instances(const RID *p_reflection_probe_instances, uint32_t p_reflection_probe_instance_count) override {}
+		virtual void pair_reflection_probe_instances(const RID *p_reflection_probe_instances, uint32_t p_reflection_probe_instance_count) override;
 		virtual void pair_decal_instances(const RID *p_decal_instances, uint32_t p_decal_instance_count) override {}
 		virtual void pair_voxel_gi_instances(const RID *p_voxel_gi_instances, uint32_t p_voxel_gi_instance_count) override {}
 
@@ -686,10 +689,8 @@ protected:
 		RID fog_shader;
 		GLuint screen_triangle = 0;
 		GLuint screen_triangle_array = 0;
-		GLuint radical_inverse_vdc_cache_tex = 0;
 		uint32_t max_directional_lights = 4;
 		uint32_t roughness_layers = 8;
-		uint32_t ggx_samples = 128;
 	} sky_globals;
 
 	struct Sky {
@@ -733,7 +734,6 @@ protected:
 	void _invalidate_sky(Sky *p_sky);
 	void _update_dirty_skys();
 	void _update_sky_radiance(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_sky_energy_multiplier);
-	void _filter_sky_radiance(Sky *p_sky, int p_base_layer);
 	void _draw_sky(RID p_env, const Projection &p_projection, const Transform3D &p_transform, float p_sky_energy_multiplier, float p_luminance_multiplier, bool p_use_multiview, bool p_flip_y, bool p_apply_color_adjustments_in_post);
 	void _free_sky_data(Sky *p_sky);
 
diff --git a/drivers/gles3/shaders/SCsub b/drivers/gles3/shaders/SCsub
index 0292b5d519..e70912cb4d 100644
--- a/drivers/gles3/shaders/SCsub
+++ b/drivers/gles3/shaders/SCsub
@@ -18,7 +18,6 @@ if "GLES3_GLSL" in env["BUILDERS"]:
     env.GLES3_GLSL("canvas.glsl")
     env.GLES3_GLSL("scene.glsl")
     env.GLES3_GLSL("sky.glsl")
-    env.GLES3_GLSL("cubemap_filter.glsl")
     env.GLES3_GLSL("canvas_occlusion.glsl")
     env.GLES3_GLSL("canvas_sdf.glsl")
     env.GLES3_GLSL("particles.glsl")
diff --git a/drivers/gles3/shaders/cubemap_filter.glsl b/drivers/gles3/shaders/effects/cubemap_filter.glsl
index 6fcb23204d..6fcb23204d 100644
--- a/drivers/gles3/shaders/cubemap_filter.glsl
+++ b/drivers/gles3/shaders/effects/cubemap_filter.glsl
diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl
index 8bf844991d..36bbca8728 100644
--- a/drivers/gles3/shaders/scene.glsl
+++ b/drivers/gles3/shaders/scene.glsl
@@ -12,6 +12,7 @@ DISABLE_LIGHTMAP = false
 DISABLE_LIGHT_DIRECTIONAL = false
 DISABLE_LIGHT_OMNI = false
 DISABLE_LIGHT_SPOT = false
+DISABLE_REFLECTION_PROBE = true
 DISABLE_FOG = false
 USE_DEPTH_FOG = false
 USE_RADIANCE_MAP = true
@@ -34,6 +35,7 @@ APPLY_TONEMAPPING = true
 ADDITIVE_OMNI = false
 ADDITIVE_SPOT = false
 RENDER_MATERIAL = false
+SECOND_REFLECTION_PROBE = false
 
 
 #[vertex]
@@ -568,8 +570,11 @@ void main() {
 1-color correction // In tonemap_inc.glsl
 2-radiance
 3-shadow
+4-lightmap textures
 5-screen
 6-depth
+7-reflection probe 1
+8-reflection probe 2
 
 */
 
@@ -626,7 +631,39 @@ in highp vec4 shadow_coord4;
 
 uniform samplerCube radiance_map; // texunit:-2
 
-#endif
+#endif // USE_RADIANCE_MAP
+
+#ifndef DISABLE_REFLECTION_PROBE
+
+#define REFLECTION_PROBE_MAX_LOD 8.0
+
+uniform bool refprobe1_use_box_project;
+uniform highp vec3 refprobe1_box_extents;
+uniform vec3 refprobe1_box_offset;
+uniform highp mat4 refprobe1_local_matrix;
+uniform bool refprobe1_exterior;
+uniform float refprobe1_intensity;
+uniform int refprobe1_ambient_mode;
+uniform vec4 refprobe1_ambient_color;
+
+uniform samplerCube refprobe1_texture; // texunit:-7
+
+#ifdef SECOND_REFLECTION_PROBE
+
+uniform bool refprobe2_use_box_project;
+uniform highp vec3 refprobe2_box_extents;
+uniform vec3 refprobe2_box_offset;
+uniform highp mat4 refprobe2_local_matrix;
+uniform bool refprobe2_exterior;
+uniform float refprobe2_intensity;
+uniform int refprobe2_ambient_mode;
+uniform vec4 refprobe2_ambient_color;
+
+uniform samplerCube refprobe2_texture; // texunit:-8
+
+#endif // SECOND_REFLECTION_PROBE
+
+#endif // DISABLE_REFLECTION_PROBE
 
 layout(std140) uniform GlobalShaderUniformData { //ubo:1
 	vec4 global_shader_uniforms[MAX_GLOBAL_SHADER_UNIFORMS];
@@ -1289,6 +1326,90 @@ vec4 fog_process(vec3 vertex) {
 	return vec4(fog_color, fog_amount);
 }
 
+#ifndef DISABLE_REFLECTION_PROBE
+
+#define REFLECTION_AMBIENT_DISABLED 0
+#define REFLECTION_AMBIENT_ENVIRONMENT 1
+#define REFLECTION_AMBIENT_COLOR 2
+
+void reflection_process(samplerCube reflection_map,
+		vec3 normal, vec3 vertex,
+		mat4 local_matrix,
+		bool use_box_project, vec3 box_extents, vec3 box_offset,
+		bool exterior, float intensity, int ref_ambient_mode, vec4 ref_ambient_color,
+		float roughness, vec3 ambient, vec3 skybox,
+		inout highp vec4 reflection_accum, inout highp vec4 ambient_accum) {
+	vec4 reflection;
+
+	vec3 local_pos = (local_matrix * vec4(vertex, 1.0)).xyz;
+
+	if (any(greaterThan(abs(local_pos), box_extents))) { //out of the reflection box
+		return;
+	}
+
+	vec3 inner_pos = abs(local_pos / box_extents);
+	float blend = max(inner_pos.x, max(inner_pos.y, inner_pos.z));
+	blend = mix(length(inner_pos), blend, blend);
+	blend *= blend;
+	blend = max(0.0, 1.0 - blend);
+
+	//reflect and make local
+	vec3 ref_normal = normalize(reflect(vertex, normal));
+	ref_normal = (local_matrix * vec4(ref_normal, 0.0)).xyz;
+
+	if (use_box_project) { //box project
+
+		vec3 nrdir = normalize(ref_normal);
+		vec3 rbmax = (box_extents - local_pos) / nrdir;
+		vec3 rbmin = (-box_extents - local_pos) / nrdir;
+
+		vec3 rbminmax = mix(rbmin, rbmax, vec3(greaterThan(nrdir, vec3(0.0, 0.0, 0.0))));
+
+		float fa = min(min(rbminmax.x, rbminmax.y), rbminmax.z);
+		vec3 posonbox = local_pos + nrdir * fa;
+		ref_normal = posonbox - box_offset.xyz;
+	}
+
+	reflection.rgb = srgb_to_linear(textureLod(reflection_map, ref_normal, roughness * MAX_ROUGHNESS_LOD).rgb);
+
+	if (exterior) {
+		reflection.rgb = mix(skybox, reflection.rgb, blend);
+	}
+	reflection.rgb *= intensity;
+	reflection.a = blend;
+	reflection.rgb *= blend;
+
+	reflection_accum += reflection;
+
+#ifndef USE_LIGHTMAP
+	if (ref_ambient_mode == REFLECTION_AMBIENT_ENVIRONMENT) {
+		vec4 ambient_out;
+		vec3 amb_normal = (local_matrix * vec4(normal, 0.0)).xyz;
+
+		ambient_out.rgb = srgb_to_linear(textureLod(reflection_map, amb_normal, MAX_ROUGHNESS_LOD).rgb);
+		if (exterior) {
+			ambient_out.rgb = mix(ambient, ambient_out.rgb, blend);
+		}
+
+		ambient_out.a = blend;
+		ambient_out.rgb *= blend;
+		ambient_accum += ambient_out;
+	} else if (ref_ambient_mode == REFLECTION_AMBIENT_COLOR) {
+		vec4 ambient_out;
+		ambient_out.rgb = ref_ambient_color.rgb;
+		if (exterior) {
+			ambient_out.rgb = mix(ambient, ambient_out.rgb, blend);
+		}
+
+		ambient_out.a = blend;
+		ambient_out.rgb *= blend;
+		ambient_accum += ambient_out;
+	}
+#endif // USE_LIGHTMAP
+}
+
+#endif // DISABLE_REFLECTION_PROBE
+
 #endif // !MODE_RENDER_DEPTH
 
 void main() {
@@ -1489,9 +1610,33 @@ void main() {
 		specular_light *= horizon * horizon;
 		specular_light *= scene_data.ambient_light_color_energy.a;
 	}
-#endif
+#endif // USE_RADIANCE_MAP
 
 	// Calculate Reflection probes
+#ifndef DISABLE_REFLECTION_PROBE
+	vec4 ambient_accum = vec4(0.0);
+	{
+		vec4 reflection_accum = vec4(0.0);
+
+		reflection_process(refprobe1_texture, normal, vertex_interp, refprobe1_local_matrix,
+				refprobe1_use_box_project, refprobe1_box_extents, refprobe1_box_offset,
+				refprobe1_exterior, refprobe1_intensity, refprobe1_ambient_mode, refprobe1_ambient_color,
+				roughness, ambient_light, specular_light, reflection_accum, ambient_accum);
+
+#ifdef SECOND_REFLECTION_PROBE
+
+		reflection_process(refprobe2_texture, normal, vertex_interp, refprobe2_local_matrix,
+				refprobe2_use_box_project, refprobe2_box_extents, refprobe2_box_offset,
+				refprobe2_exterior, refprobe2_intensity, refprobe2_ambient_mode, refprobe2_ambient_color,
+				roughness, ambient_light, specular_light, reflection_accum, ambient_accum);
+
+#endif // SECOND_REFLECTION_PROBE
+
+		if (reflection_accum.a > 0.0) {
+			specular_light = reflection_accum.rgb / reflection_accum.a;
+		}
+	}
+#endif // DISABLE_REFLECTION_PROBE
 
 #if defined(CUSTOM_RADIANCE_USED)
 	specular_light = mix(specular_light, custom_radiance.rgb, custom_radiance.a);
@@ -1501,6 +1646,7 @@ void main() {
 	//lightmap overrides everything
 	if (scene_data.use_ambient_light) {
 		ambient_light = scene_data.ambient_light_color_energy.rgb;
+
 #ifdef USE_RADIANCE_MAP
 		if (scene_data.use_ambient_cubemap) {
 			vec3 ambient_dir = scene_data.radiance_inverse_xform * normal;
@@ -1508,7 +1654,13 @@ void main() {
 			cubemap_ambient = srgb_to_linear(cubemap_ambient);
 			ambient_light = mix(ambient_light, cubemap_ambient * scene_data.ambient_light_color_energy.a, scene_data.ambient_color_sky_mix);
 		}
-#endif
+#endif // USE_RADIANCE_MAP
+
+#ifndef DISABLE_REFLECTION_PROBE
+		if (ambient_accum.a > 0.0) {
+			ambient_light = mix(ambient_light, (ambient_accum.rgb / ambient_accum.a) * scene_data.ambient_light_color_energy.a, scene_data.ambient_color_sky_mix);
+		}
+#endif // DISABLE_REFLECTION_PROBE
 	}
 #endif // USE_LIGHTMAP
 
diff --git a/drivers/gles3/storage/light_storage.cpp b/drivers/gles3/storage/light_storage.cpp
index eebba6b00d..f9547502f4 100644
--- a/drivers/gles3/storage/light_storage.cpp
+++ b/drivers/gles3/storage/light_storage.cpp
@@ -423,149 +423,604 @@ void LightStorage::light_instance_mark_visible(RID p_light_instance) {
 /* PROBE API */
 
 RID LightStorage::reflection_probe_allocate() {
-	return RID();
+	return reflection_probe_owner.allocate_rid();
 }
 
 void LightStorage::reflection_probe_initialize(RID p_rid) {
+	ReflectionProbe probe;
+
+	reflection_probe_owner.initialize_rid(p_rid, probe);
 }
 
 void LightStorage::reflection_probe_free(RID p_rid) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_rid);
+	reflection_probe->dependency.deleted_notify(p_rid);
+
+	reflection_probe_owner.free(p_rid);
 }
 
 void LightStorage::reflection_probe_set_update_mode(RID p_probe, RS::ReflectionProbeUpdateMode p_mode) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->update_mode = p_mode;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 void LightStorage::reflection_probe_set_intensity(RID p_probe, float p_intensity) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->intensity = p_intensity;
 }
 
 void LightStorage::reflection_probe_set_ambient_mode(RID p_probe, RS::ReflectionProbeAmbientMode p_mode) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->ambient_mode = p_mode;
 }
 
 void LightStorage::reflection_probe_set_ambient_color(RID p_probe, const Color &p_color) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->ambient_color = p_color;
 }
 
 void LightStorage::reflection_probe_set_ambient_energy(RID p_probe, float p_energy) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->ambient_color_energy = p_energy;
 }
 
 void LightStorage::reflection_probe_set_max_distance(RID p_probe, float p_distance) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->max_distance = p_distance;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 void LightStorage::reflection_probe_set_size(RID p_probe, const Vector3 &p_size) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->size = p_size;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 void LightStorage::reflection_probe_set_origin_offset(RID p_probe, const Vector3 &p_offset) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->origin_offset = p_offset;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 void LightStorage::reflection_probe_set_as_interior(RID p_probe, bool p_enable) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->interior = p_enable;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 void LightStorage::reflection_probe_set_enable_box_projection(RID p_probe, bool p_enable) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->box_projection = p_enable;
 }
 
 void LightStorage::reflection_probe_set_enable_shadows(RID p_probe, bool p_enable) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->enable_shadows = p_enable;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 void LightStorage::reflection_probe_set_cull_mask(RID p_probe, uint32_t p_layers) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->cull_mask = p_layers;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 void LightStorage::reflection_probe_set_reflection_mask(RID p_probe, uint32_t p_layers) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->reflection_mask = p_layers;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 void LightStorage::reflection_probe_set_resolution(RID p_probe, int p_resolution) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->resolution = p_resolution;
 }
 
 AABB LightStorage::reflection_probe_get_aabb(RID p_probe) const {
-	return AABB();
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, AABB());
+
+	AABB aabb;
+	aabb.position = -reflection_probe->size / 2;
+	aabb.size = reflection_probe->size;
+
+	return aabb;
 }
 
 RS::ReflectionProbeUpdateMode LightStorage::reflection_probe_get_update_mode(RID p_probe) const {
-	return RenderingServer::REFLECTION_PROBE_UPDATE_ONCE;
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, RenderingServer::REFLECTION_PROBE_UPDATE_ONCE);
+
+	return reflection_probe->update_mode;
 }
 
 uint32_t LightStorage::reflection_probe_get_cull_mask(RID p_probe) const {
-	return 0;
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, 0);
+
+	return reflection_probe->cull_mask;
 }
 
 uint32_t LightStorage::reflection_probe_get_reflection_mask(RID p_probe) const {
-	return 0;
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, 0);
+
+	return reflection_probe->reflection_mask;
 }
 
 Vector3 LightStorage::reflection_probe_get_size(RID p_probe) const {
-	return Vector3();
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, Vector3());
+
+	return reflection_probe->size;
 }
 
 Vector3 LightStorage::reflection_probe_get_origin_offset(RID p_probe) const {
-	return Vector3();
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, Vector3());
+
+	return reflection_probe->origin_offset;
 }
 
 float LightStorage::reflection_probe_get_origin_max_distance(RID p_probe) const {
-	return 0.0;
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, 0.0);
+
+	return reflection_probe->max_distance;
 }
 
 bool LightStorage::reflection_probe_renders_shadows(RID p_probe) const {
-	return false;
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, false);
+
+	return reflection_probe->enable_shadows;
 }
 
 void LightStorage::reflection_probe_set_mesh_lod_threshold(RID p_probe, float p_ratio) {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL(reflection_probe);
+
+	reflection_probe->mesh_lod_threshold = p_ratio;
+	reflection_probe->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_REFLECTION_PROBE);
 }
 
 float LightStorage::reflection_probe_get_mesh_lod_threshold(RID p_probe) const {
-	return 0.0;
+	const ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, 0.0);
+
+	return reflection_probe->mesh_lod_threshold;
+}
+
+Dependency *LightStorage::reflection_probe_get_dependency(RID p_probe) const {
+	ReflectionProbe *reflection_probe = reflection_probe_owner.get_or_null(p_probe);
+	ERR_FAIL_NULL_V(reflection_probe, nullptr);
+
+	return &reflection_probe->dependency;
 }
 
 /* REFLECTION ATLAS */
 
 RID LightStorage::reflection_atlas_create() {
-	return RID();
+	ReflectionAtlas ra;
+	ra.count = GLOBAL_GET("rendering/reflections/reflection_atlas/reflection_count");
+	ra.size = GLOBAL_GET("rendering/reflections/reflection_atlas/reflection_size");
+
+	return reflection_atlas_owner.make_rid(ra);
 }
 
 void LightStorage::reflection_atlas_free(RID p_ref_atlas) {
+	reflection_atlas_set_size(p_ref_atlas, 0, 0);
+
+	reflection_atlas_owner.free(p_ref_atlas);
 }
 
 int LightStorage::reflection_atlas_get_size(RID p_ref_atlas) const {
-	return 0;
+	ReflectionAtlas *ra = reflection_atlas_owner.get_or_null(p_ref_atlas);
+	ERR_FAIL_NULL_V(ra, 0);
+
+	return ra->size;
 }
 
 void LightStorage::reflection_atlas_set_size(RID p_ref_atlas, int p_reflection_size, int p_reflection_count) {
+	ReflectionAtlas *ra = reflection_atlas_owner.get_or_null(p_ref_atlas);
+	ERR_FAIL_NULL(ra);
+
+	if (ra->size == p_reflection_size && ra->count == p_reflection_count) {
+		return; //no changes
+	}
+
+	ra->size = p_reflection_size;
+	ra->count = p_reflection_count;
+
+	if (ra->depth != 0) {
+		//clear and invalidate everything
+		for (int i = 0; i < ra->reflections.size(); i++) {
+			for (int j = 0; j < 7; j++) {
+				if (ra->reflections[i].fbos[j] != 0) {
+					glDeleteFramebuffers(1, &ra->reflections[i].fbos[j]);
+					ra->reflections.write[i].fbos[j] = 0;
+				}
+			}
+
+			GLES3::Utilities::get_singleton()->texture_free_data(ra->reflections[i].color);
+			ra->reflections.write[i].color = 0;
+
+			GLES3::Utilities::get_singleton()->texture_free_data(ra->reflections[i].radiance);
+			ra->reflections.write[i].radiance = 0;
+
+			if (ra->reflections[i].owner.is_null()) {
+				continue;
+			}
+			reflection_probe_release_atlas_index(ra->reflections[i].owner);
+			//rp->atlasindex clear
+		}
+
+		ra->reflections.clear();
+
+		GLES3::Utilities::get_singleton()->texture_free_data(ra->depth);
+		ra->depth = 0;
+	}
+
+	if (ra->render_buffers.is_valid()) {
+		ra->render_buffers->free_render_buffer_data();
+	}
 }
 
 /* REFLECTION PROBE INSTANCE */
 
 RID LightStorage::reflection_probe_instance_create(RID p_probe) {
-	return RID();
+	ReflectionProbeInstance rpi;
+	rpi.probe = p_probe;
+
+	return reflection_probe_instance_owner.make_rid(rpi);
 }
 
 void LightStorage::reflection_probe_instance_free(RID p_instance) {
+	reflection_probe_release_atlas_index(p_instance);
+	reflection_probe_instance_owner.free(p_instance);
 }
 
 void LightStorage::reflection_probe_instance_set_transform(RID p_instance, const Transform3D &p_transform) {
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL(rpi);
+
+	rpi->transform = p_transform;
+	rpi->dirty = true;
 }
 
 bool LightStorage::reflection_probe_has_atlas_index(RID p_instance) {
-	return false;
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL_V(rpi, false);
+
+	if (rpi->atlas.is_null()) {
+		return false;
+	}
+
+	return rpi->atlas_index >= 0;
 }
 
 void LightStorage::reflection_probe_release_atlas_index(RID p_instance) {
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL(rpi);
+
+	if (rpi->atlas.is_null()) {
+		return; //nothing to release
+	}
+	ReflectionAtlas *atlas = reflection_atlas_owner.get_or_null(rpi->atlas);
+	ERR_FAIL_NULL(atlas);
+
+	ERR_FAIL_INDEX(rpi->atlas_index, atlas->reflections.size());
+	atlas->reflections.write[rpi->atlas_index].owner = RID();
+
+	if (rpi->rendering) {
+		// We were cancelled mid rendering, trigger refresh.
+		rpi->rendering = false;
+		rpi->dirty = true;
+		rpi->processing_layer = 0;
+	}
+
+	rpi->atlas_index = -1;
+	rpi->atlas = RID();
 }
 
 bool LightStorage::reflection_probe_instance_needs_redraw(RID p_instance) {
-	return false;
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL_V(rpi, false);
+
+	if (rpi->rendering) {
+		return false;
+	}
+
+	if (rpi->dirty) {
+		return true;
+	}
+
+	if (reflection_probe_get_update_mode(rpi->probe) == RS::REFLECTION_PROBE_UPDATE_ALWAYS) {
+		return true;
+	}
+
+	return rpi->atlas_index == -1;
 }
 
 bool LightStorage::reflection_probe_instance_has_reflection(RID p_instance) {
-	return false;
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL_V(rpi, false);
+
+	return rpi->atlas.is_valid();
 }
 
 bool LightStorage::reflection_probe_instance_begin_render(RID p_instance, RID p_reflection_atlas) {
-	return false;
+	TextureStorage *texture_storage = TextureStorage::get_singleton();
+	ReflectionAtlas *atlas = reflection_atlas_owner.get_or_null(p_reflection_atlas);
+
+	ERR_FAIL_NULL_V(atlas, false);
+
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL_V(rpi, false);
+
+	if (atlas->render_buffers.is_null()) {
+		atlas->render_buffers.instantiate();
+		atlas->render_buffers->configure_for_probe(Size2i(atlas->size, atlas->size));
+	}
+
+	// First we check if our atlas is initialized.
+
+	// Not making an exception for update_mode = REFLECTION_PROBE_UPDATE_ALWAYS, we are using
+	// the same render techniques regardless of realtime or update once (for now).
+
+	if (atlas->depth == 0) {
+		// We need to create our textures
+		atlas->mipmap_count = Image::get_image_required_mipmaps(atlas->size, atlas->size, Image::FORMAT_RGBAH) - 1;
+		atlas->mipmap_count = MIN(atlas->mipmap_count, 8); // No more than 8 please..
+
+		glActiveTexture(GL_TEXTURE0);
+
+		{
+			// We create one set of 6 layers for depth, we can reuse this when rendering.
+			glGenTextures(1, &atlas->depth);
+			glBindTexture(GL_TEXTURE_2D_ARRAY, atlas->depth);
+
+			glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_DEPTH_COMPONENT24, atlas->size, atlas->size, 6, 0, GL_DEPTH_COMPONENT, GL_UNSIGNED_INT, nullptr);
+
+			GLES3::Utilities::get_singleton()->texture_allocated_data(atlas->depth, atlas->size * atlas->size * 6 * 3, "Reflection probe atlas (depth)");
+		}
+
+		// Make room for our atlas entries
+		atlas->reflections.resize(atlas->count);
+
+		for (int i = 0; i < atlas->count; i++) {
+			// Create a cube map for this atlas entry
+			GLuint color = 0;
+			glGenTextures(1, &color);
+			glBindTexture(GL_TEXTURE_CUBE_MAP, color);
+			atlas->reflections.write[i].color = color;
+
+#ifdef GL_API_ENABLED
+			if (RasterizerGLES3::is_gles_over_gl()) {
+				for (int s = 0; s < 6; s++) {
+					glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + s, 0, GL_RGB10_A2, atlas->size, atlas->size, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
+				}
+				glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
+			}
+#endif
+#ifdef GLES_API_ENABLED
+			if (!RasterizerGLES3::is_gles_over_gl()) {
+				glTexStorage2D(GL_TEXTURE_CUBE_MAP, atlas->mipmap_count, GL_RGB10_A2, atlas->size, atlas->size);
+			}
+#endif // GLES_API_ENABLED
+
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_BASE_LEVEL, 0);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, atlas->mipmap_count - 1);
+
+			// Setup sizes and calculate how much memory we're using.
+			int mipmap_size = atlas->size;
+			uint32_t data_size = 0;
+			for (int m = 0; m < atlas->mipmap_count; m++) {
+				atlas->mipmap_size[m] = mipmap_size;
+				data_size += mipmap_size * mipmap_size * 6 * 4;
+				mipmap_size = MAX(mipmap_size >> 1, 1);
+			}
+
+			GLES3::Utilities::get_singleton()->texture_allocated_data(color, data_size, String("Reflection probe atlas (") + String::num_int64(i) + String(", color)"));
+
+			// Create a radiance map for this atlas entry
+			GLuint radiance = 0;
+			glGenTextures(1, &radiance);
+			glBindTexture(GL_TEXTURE_CUBE_MAP, radiance);
+			atlas->reflections.write[i].radiance = radiance;
+
+#ifdef GL_API_ENABLED
+			if (RasterizerGLES3::is_gles_over_gl()) {
+				for (int s = 0; s < 6; s++) {
+					glTexImage2D(GL_TEXTURE_CUBE_MAP_POSITIVE_X + s, 0, GL_RGB10_A2, atlas->size, atlas->size, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, nullptr);
+				}
+				glGenerateMipmap(GL_TEXTURE_CUBE_MAP);
+			}
+#endif
+#ifdef GLES_API_ENABLED
+			if (!RasterizerGLES3::is_gles_over_gl()) {
+				glTexStorage2D(GL_TEXTURE_CUBE_MAP, atlas->mipmap_count, GL_RGB10_A2, atlas->size, atlas->size);
+			}
+#endif // GLES_API_ENABLED
+
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_LINEAR);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_BASE_LEVEL, 0);
+			glTexParameteri(GL_TEXTURE_CUBE_MAP, GL_TEXTURE_MAX_LEVEL, atlas->mipmap_count - 1);
+
+			// Same data size as our color buffer
+			GLES3::Utilities::get_singleton()->texture_allocated_data(radiance, data_size, String("Reflection probe atlas (") + String::num_int64(i) + String(", radiance)"));
+
+			// Create our framebuffers so we can draw to all sides
+			for (int side = 0; side < 6; side++) {
+				GLuint fbo = 0;
+				glGenFramebuffers(1, &fbo);
+				glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+
+				// We use glFramebufferTexture2D for the color buffer as glFramebufferTextureLayer doesn't always work with cubemaps.
+				glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_CUBE_MAP_POSITIVE_X + side, color, 0);
+				glFramebufferTextureLayer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, atlas->depth, 0, side);
+
+				// Validate framebuffer
+				GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER);
+				if (status != GL_FRAMEBUFFER_COMPLETE) {
+					WARN_PRINT("Could not create reflections framebuffer, status: " + texture_storage->get_framebuffer_error(status));
+				}
+
+				atlas->reflections.write[i].fbos[side] = fbo;
+			}
+
+			// Create an extra framebuffer for building our radiance
+			{
+				GLuint fbo = 0;
+				glGenFramebuffers(1, &fbo);
+				glBindFramebuffer(GL_FRAMEBUFFER, fbo);
+
+				atlas->reflections.write[i].fbos[6] = fbo;
+			}
+		}
+
+		glBindFramebuffer(GL_FRAMEBUFFER, GLES3::TextureStorage::system_fbo);
+		glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
+		glBindTexture(GL_TEXTURE_2D_ARRAY, 0);
+	}
+
+	// Then we find a free slot for our reflection probe
+
+	if (rpi->atlas_index == -1) {
+		for (int i = 0; i < atlas->reflections.size(); i++) {
+			if (atlas->reflections[i].owner.is_null()) {
+				rpi->atlas_index = i;
+				break;
+			}
+		}
+		//find the one used last
+		if (rpi->atlas_index == -1) {
+			//everything is in use, find the one least used via LRU
+			uint64_t pass_min = 0;
+
+			for (int i = 0; i < atlas->reflections.size(); i++) {
+				ReflectionProbeInstance *rpi2 = reflection_probe_instance_owner.get_or_null(atlas->reflections[i].owner);
+				if (rpi2->last_pass < pass_min) {
+					pass_min = rpi2->last_pass;
+					rpi->atlas_index = i;
+				}
+			}
+		}
+	}
+
+	if (rpi->atlas_index != -1) { // should we fail if this is still -1 ?
+		atlas->reflections.write[rpi->atlas_index].owner = p_instance;
+	}
+
+	rpi->atlas = p_reflection_atlas;
+	rpi->rendering = true;
+	rpi->dirty = false;
+	rpi->processing_layer = 0;
+
+	return true;
 }
 
 Ref<RenderSceneBuffers> LightStorage::reflection_probe_atlas_get_render_buffers(RID p_reflection_atlas) {
-	return Ref<RenderSceneBuffers>();
+	ReflectionAtlas *atlas = reflection_atlas_owner.get_or_null(p_reflection_atlas);
+	ERR_FAIL_NULL_V(atlas, Ref<RenderSceneBuffersGLES3>());
+
+	return atlas->render_buffers;
 }
 
 bool LightStorage::reflection_probe_instance_postprocess_step(RID p_instance) {
-	return true;
+	GLES3::CubemapFilter *cubemap_filter = GLES3::CubemapFilter::get_singleton();
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL_V(rpi, false);
+	ERR_FAIL_COND_V(!rpi->rendering, false);
+	ERR_FAIL_COND_V(rpi->atlas.is_null(), false);
+
+	ReflectionAtlas *atlas = reflection_atlas_owner.get_or_null(rpi->atlas);
+	if (!atlas || rpi->atlas_index == -1) {
+		//does not belong to an atlas anymore, cancel (was removed from atlas or atlas changed while rendering)
+		rpi->rendering = false;
+		rpi->processing_layer = 0;
+		return false;
+	}
+
+	if (LightStorage::get_singleton()->reflection_probe_get_update_mode(rpi->probe) == RS::REFLECTION_PROBE_UPDATE_ALWAYS) {
+		// Using real time reflections, all roughness is done in one step
+		for (int m = 0; m < atlas->mipmap_count; m++) {
+			const GLES3::ReflectionAtlas::Reflection &reflection = atlas->reflections[rpi->atlas_index];
+			cubemap_filter->filter_radiance(reflection.color, reflection.radiance, reflection.fbos[6], atlas->size, atlas->mipmap_count, m);
+		}
+
+		rpi->rendering = false;
+		rpi->processing_layer = 0;
+		return true;
+	} else {
+		const GLES3::ReflectionAtlas::Reflection &reflection = atlas->reflections[rpi->atlas_index];
+		cubemap_filter->filter_radiance(reflection.color, reflection.radiance, reflection.fbos[6], atlas->size, atlas->mipmap_count, rpi->processing_layer);
+
+		rpi->processing_layer++;
+		if (rpi->processing_layer == atlas->mipmap_count) {
+			rpi->rendering = false;
+			rpi->processing_layer = 0;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+GLuint LightStorage::reflection_probe_instance_get_texture(RID p_instance) {
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL_V(rpi, 0);
+
+	ReflectionAtlas *atlas = reflection_atlas_owner.get_or_null(rpi->atlas);
+	ERR_FAIL_NULL_V(atlas, 0);
+
+	return atlas->reflections[rpi->atlas_index].radiance;
+}
+
+GLuint LightStorage::reflection_probe_instance_get_framebuffer(RID p_instance, int p_index) {
+	ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+	ERR_FAIL_NULL_V(rpi, 0);
+	ERR_FAIL_INDEX_V(p_index, 6, 0);
+
+	ReflectionAtlas *atlas = reflection_atlas_owner.get_or_null(rpi->atlas);
+	ERR_FAIL_NULL_V(atlas, 0);
+	return atlas->reflections[rpi->atlas_index].fbos[p_index];
 }
 
 /* LIGHTMAP CAPTURE */
diff --git a/drivers/gles3/storage/light_storage.h b/drivers/gles3/storage/light_storage.h
index 51c5c48106..b6e64c9492 100644
--- a/drivers/gles3/storage/light_storage.h
+++ b/drivers/gles3/storage/light_storage.h
@@ -34,6 +34,7 @@
 #ifdef GLES3_ENABLED
 
 #include "platform_gl.h"
+#include "render_scene_buffers_gles3.h"
 
 #include "core/templates/local_vector.h"
 #include "core/templates/rid_owner.h"
@@ -126,12 +127,51 @@ struct ReflectionProbe {
 	bool box_projection = false;
 	bool enable_shadows = false;
 	uint32_t cull_mask = (1 << 20) - 1;
+	uint32_t reflection_mask = (1 << 20) - 1;
 	float mesh_lod_threshold = 0.01;
 	float baked_exposure = 1.0;
 
 	Dependency dependency;
 };
 
+/* REFLECTION ATLAS */
+
+struct ReflectionAtlas {
+	int count = 0;
+	int size = 0;
+
+	int mipmap_count = 1; // number of mips, including original
+	int mipmap_size[8];
+	GLuint depth = 0;
+
+	struct Reflection {
+		RID owner;
+		GLuint color = 0;
+		GLuint radiance = 0;
+		GLuint fbos[7];
+	};
+	Vector<Reflection> reflections;
+
+	Ref<RenderSceneBuffersGLES3> render_buffers; // Further render buffers used.
+};
+
+/* REFLECTION PROBE INSTANCE */
+
+struct ReflectionProbeInstance {
+	RID probe;
+	int atlas_index = -1;
+	RID atlas;
+
+	bool dirty = true;
+	bool rendering = false;
+	int processing_layer = 0;
+
+	uint64_t last_pass = 0;
+	uint32_t cull_mask = 0;
+
+	Transform3D transform;
+};
+
 /* LIGHTMAP */
 
 struct Lightmap {
@@ -181,6 +221,13 @@ private:
 	/* REFLECTION PROBE */
 	mutable RID_Owner<ReflectionProbe, true> reflection_probe_owner;
 
+	/* REFLECTION ATLAS */
+	mutable RID_Owner<ReflectionAtlas> reflection_atlas_owner;
+
+	/* REFLECTION PROBE INSTANCE */
+
+	mutable RID_Owner<ReflectionProbeInstance> reflection_probe_instance_owner;
+
 	/* LIGHTMAP */
 
 	Vector<RID> lightmap_textures;
@@ -394,6 +441,29 @@ public:
 	virtual void light_instance_set_shadow_transform(RID p_light_instance, const Projection &p_projection, const Transform3D &p_transform, float p_far, float p_split, int p_pass, float p_shadow_texel_size, float p_bias_scale = 1.0, float p_range_begin = 0, const Vector2 &p_uv_scale = Vector2()) override;
 	virtual void light_instance_mark_visible(RID p_light_instance) override;
 
+	virtual bool light_instance_is_shadow_visible_at_position(RID p_light_instance, const Vector3 &p_position) const override {
+		const LightInstance *light_instance = light_instance_owner.get_or_null(p_light_instance);
+		ERR_FAIL_NULL_V(light_instance, false);
+		const Light *light = light_owner.get_or_null(light_instance->light);
+		ERR_FAIL_NULL_V(light, false);
+
+		if (!light->shadow) {
+			return false;
+		}
+
+		if (!light->distance_fade) {
+			return true;
+		}
+
+		real_t distance = p_position.distance_to(light_instance->transform.origin);
+
+		if (distance > light->distance_fade_shadow + light->distance_fade_length) {
+			return false;
+		}
+
+		return true;
+	}
+
 	_FORCE_INLINE_ RID light_instance_get_base_light(RID p_light_instance) {
 		LightInstance *li = light_instance_owner.get_or_null(p_light_instance);
 		return li->light;
@@ -559,6 +629,9 @@ public:
 
 	/* PROBE API */
 
+	ReflectionProbe *get_reflection_probe(RID p_rid) { return reflection_probe_owner.get_or_null(p_rid); };
+	bool owns_reflection_probe(RID p_rid) { return reflection_probe_owner.owns(p_rid); };
+
 	virtual RID reflection_probe_allocate() override;
 	virtual void reflection_probe_initialize(RID p_rid) override;
 	virtual void reflection_probe_free(RID p_rid) override;
@@ -589,8 +662,12 @@ public:
 	virtual float reflection_probe_get_origin_max_distance(RID p_probe) const override;
 	virtual bool reflection_probe_renders_shadows(RID p_probe) const override;
 
+	Dependency *reflection_probe_get_dependency(RID p_probe) const;
+
 	/* REFLECTION ATLAS */
 
+	bool owns_reflection_atlas(RID p_rid) { return reflection_atlas_owner.owns(p_rid); }
+
 	virtual RID reflection_atlas_create() override;
 	virtual void reflection_atlas_free(RID p_ref_atlas) override;
 	virtual int reflection_atlas_get_size(RID p_ref_atlas) const override;
@@ -598,6 +675,8 @@ public:
 
 	/* REFLECTION PROBE INSTANCE */
 
+	bool owns_reflection_probe_instance(RID p_rid) { return reflection_probe_instance_owner.owns(p_rid); }
+
 	virtual RID reflection_probe_instance_create(RID p_probe) override;
 	virtual void reflection_probe_instance_free(RID p_instance) override;
 	virtual void reflection_probe_instance_set_transform(RID p_instance, const Transform3D &p_transform) override;
@@ -609,6 +688,27 @@ public:
 	virtual Ref<RenderSceneBuffers> reflection_probe_atlas_get_render_buffers(RID p_reflection_atlas) override;
 	virtual bool reflection_probe_instance_postprocess_step(RID p_instance) override;
 
+	_FORCE_INLINE_ RID reflection_probe_instance_get_probe(RID p_instance) {
+		ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+		ERR_FAIL_NULL_V(rpi, RID());
+
+		return rpi->probe;
+	}
+	_FORCE_INLINE_ RID reflection_probe_instance_get_atlas(RID p_instance) {
+		ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+		ERR_FAIL_NULL_V(rpi, RID());
+
+		return rpi->atlas;
+	}
+	Transform3D reflection_probe_instance_get_transform(RID p_instance) {
+		ReflectionProbeInstance *rpi = reflection_probe_instance_owner.get_or_null(p_instance);
+		ERR_FAIL_NULL_V(rpi, Transform3D());
+
+		return rpi->transform;
+	}
+	GLuint reflection_probe_instance_get_texture(RID p_instance);
+	GLuint reflection_probe_instance_get_framebuffer(RID p_instance, int p_index);
+
 	/* LIGHTMAP CAPTURE */
 
 	Lightmap *get_lightmap(RID p_rid) { return lightmap_owner.get_or_null(p_rid); };
diff --git a/drivers/gles3/storage/material_storage.h b/drivers/gles3/storage/material_storage.h
index 59f5682362..02aecf33d6 100644
--- a/drivers/gles3/storage/material_storage.h
+++ b/drivers/gles3/storage/material_storage.h
@@ -43,7 +43,6 @@
 #include "servers/rendering/storage/utilities.h"
 
 #include "drivers/gles3/shaders/canvas.glsl.gen.h"
-#include "drivers/gles3/shaders/cubemap_filter.glsl.gen.h"
 #include "drivers/gles3/shaders/particles.glsl.gen.h"
 #include "drivers/gles3/shaders/scene.glsl.gen.h"
 #include "drivers/gles3/shaders/sky.glsl.gen.h"
@@ -543,7 +542,6 @@ public:
 		CanvasShaderGLES3 canvas_shader;
 		SkyShaderGLES3 sky_shader;
 		SceneShaderGLES3 scene_shader;
-		CubemapFilterShaderGLES3 cubemap_filter_shader;
 		ParticlesShaderGLES3 particles_process_shader;
 
 		ShaderCompiler compiler_canvas;
diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.cpp b/drivers/gles3/storage/render_scene_buffers_gles3.cpp
index de0a64f5fe..6803c92dc9 100644
--- a/drivers/gles3/storage/render_scene_buffers_gles3.cpp
+++ b/drivers/gles3/storage/render_scene_buffers_gles3.cpp
@@ -405,6 +405,13 @@ void RenderSceneBuffersGLES3::_check_render_buffers() {
 	}
 }
 
+void RenderSceneBuffersGLES3::configure_for_probe(Size2i p_size) {
+	internal_size = p_size;
+	target_size = p_size;
+	scaling_3d_mode = RS::VIEWPORT_SCALING_3D_MODE_OFF;
+	view_count = 1;
+}
+
 void RenderSceneBuffersGLES3::_clear_msaa3d_buffers() {
 	for (const FBDEF &cached_fbo : msaa3d.cached_fbos) {
 		GLuint fbo = cached_fbo.fbo;
diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.h b/drivers/gles3/storage/render_scene_buffers_gles3.h
index 8d03d3438d..85a02c860d 100644
--- a/drivers/gles3/storage/render_scene_buffers_gles3.h
+++ b/drivers/gles3/storage/render_scene_buffers_gles3.h
@@ -101,6 +101,7 @@ public:
 	RenderSceneBuffersGLES3();
 	virtual ~RenderSceneBuffersGLES3();
 	virtual void configure(const RenderSceneBuffersConfiguration *p_config) override;
+	void configure_for_probe(Size2i p_size);
 
 	virtual void set_fsr_sharpness(float p_fsr_sharpness) override{};
 	virtual void set_texture_mipmap_bias(float p_texture_mipmap_bias) override{};
diff --git a/drivers/gles3/storage/utilities.cpp b/drivers/gles3/storage/utilities.cpp
index c4fbe098cd..7e2e3dfa2b 100644
--- a/drivers/gles3/storage/utilities.cpp
+++ b/drivers/gles3/storage/utilities.cpp
@@ -158,6 +158,8 @@ RS::InstanceType Utilities::get_base_type(RID p_rid) const {
 		return RS::INSTANCE_LIGHTMAP;
 	} else if (GLES3::ParticlesStorage::get_singleton()->owns_particles(p_rid)) {
 		return RS::INSTANCE_PARTICLES;
+	} else if (GLES3::LightStorage::get_singleton()->owns_reflection_probe(p_rid)) {
+		return RS::INSTANCE_REFLECTION_PROBE;
 	} else if (GLES3::ParticlesStorage::get_singleton()->owns_particles_collision(p_rid)) {
 		return RS::INSTANCE_PARTICLES_COLLISION;
 	} else if (owns_visibility_notifier(p_rid)) {
@@ -197,6 +199,15 @@ bool Utilities::free(RID p_rid) {
 	} else if (GLES3::LightStorage::get_singleton()->owns_lightmap(p_rid)) {
 		GLES3::LightStorage::get_singleton()->lightmap_free(p_rid);
 		return true;
+	} else if (GLES3::LightStorage::get_singleton()->owns_reflection_probe(p_rid)) {
+		GLES3::LightStorage::get_singleton()->reflection_probe_free(p_rid);
+		return true;
+	} else if (GLES3::LightStorage::get_singleton()->owns_reflection_atlas(p_rid)) {
+		GLES3::LightStorage::get_singleton()->reflection_atlas_free(p_rid);
+		return true;
+	} else if (GLES3::LightStorage::get_singleton()->owns_reflection_probe_instance(p_rid)) {
+		GLES3::LightStorage::get_singleton()->reflection_probe_instance_free(p_rid);
+		return true;
 	} else if (GLES3::ParticlesStorage::get_singleton()->owns_particles(p_rid)) {
 		GLES3::ParticlesStorage::get_singleton()->particles_free(p_rid);
 		return true;
@@ -229,6 +240,9 @@ void Utilities::base_update_dependency(RID p_base, DependencyTracker *p_instance
 		if (multimesh->mesh.is_valid()) {
 			base_update_dependency(multimesh->mesh, p_instance);
 		}
+	} else if (LightStorage::get_singleton()->owns_reflection_probe(p_base)) {
+		Dependency *dependency = LightStorage::get_singleton()->reflection_probe_get_dependency(p_base);
+		p_instance->update_dependency(dependency);
 	} else if (LightStorage::get_singleton()->owns_light(p_base)) {
 		Light *l = LightStorage::get_singleton()->get_light(p_base);
 		p_instance->update_dependency(&l->dependency);
diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp
index 597e41fd22..ffeb5ad752 100644
--- a/drivers/unix/os_unix.cpp
+++ b/drivers/unix/os_unix.cpp
@@ -741,7 +741,7 @@ String OS_Unix::get_locale() const {
 	return locale;
 }
 
-Error OS_Unix::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path) {
+Error OS_Unix::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path, bool p_generate_temp_files) {
 	String path = p_path;
 
 	if (FileAccess::exists(path) && path.is_relative_path()) {
diff --git a/drivers/unix/os_unix.h b/drivers/unix/os_unix.h
index abbaf9b8cd..2936c28797 100644
--- a/drivers/unix/os_unix.h
+++ b/drivers/unix/os_unix.h
@@ -55,7 +55,7 @@ public:
 
 	virtual Error get_entropy(uint8_t *r_buffer, int p_bytes) override;
 
-	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) override;
+	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr, bool p_generate_temp_files = false) override;
 	virtual Error close_dynamic_library(void *p_library_handle) override;
 	virtual Error get_dynamic_library_symbol_handle(void *p_library_handle, const String &p_name, void *&p_symbol_handle, bool p_optional = false) override;
 
diff --git a/editor/debugger/editor_debugger_tree.cpp b/editor/debugger/editor_debugger_tree.cpp
index 17a23982a7..de2e51ee81 100644
--- a/editor/debugger/editor_debugger_tree.cpp
+++ b/editor/debugger/editor_debugger_tree.cpp
@@ -41,7 +41,6 @@
 EditorDebuggerTree::EditorDebuggerTree() {
 	set_v_size_flags(SIZE_EXPAND_FILL);
 	set_allow_rmb_select(true);
-	set_auto_translate_mode(AUTO_TRANSLATE_MODE_DISABLED);
 
 	// Popup
 	item_menu = memnew(PopupMenu);
@@ -57,6 +56,8 @@ EditorDebuggerTree::EditorDebuggerTree() {
 void EditorDebuggerTree::_notification(int p_what) {
 	switch (p_what) {
 		case NOTIFICATION_POSTINITIALIZE: {
+			set_auto_translate_mode(AUTO_TRANSLATE_MODE_DISABLED);
+
 			connect("cell_selected", callable_mp(this, &EditorDebuggerTree::_scene_tree_selected));
 			connect("item_collapsed", callable_mp(this, &EditorDebuggerTree::_scene_tree_folded));
 			connect("item_mouse_selected", callable_mp(this, &EditorDebuggerTree::_scene_tree_rmb_selected));
diff --git a/editor/debugger/editor_file_server.cpp b/editor/debugger/editor_file_server.cpp
index 428b5f4c26..e84eb14636 100644
--- a/editor/debugger/editor_file_server.cpp
+++ b/editor/debugger/editor_file_server.cpp
@@ -201,7 +201,7 @@ void EditorFileServer::poll() {
 	// Scan files to send.
 	_scan_files_changed(EditorFileSystem::get_singleton()->get_filesystem(), tags, files_to_send, cached_files);
 	// Add forced export files
-	Vector<String> forced_export = EditorExportPlatform::get_main_pack_forced_export_files();
+	Vector<String> forced_export = EditorExportPlatform::get_forced_export_files();
 	for (int i = 0; i < forced_export.size(); i++) {
 		_add_custom_file(forced_export[i], files_to_send, cached_files);
 	}
diff --git a/editor/editor_audio_buses.cpp b/editor/editor_audio_buses.cpp
index dd5dc14136..b4e9faa4fd 100644
--- a/editor/editor_audio_buses.cpp
+++ b/editor/editor_audio_buses.cpp
@@ -925,6 +925,7 @@ EditorAudioBus::EditorAudioBus(EditorAudioBuses *p_buses, bool p_is_master) {
 	effects->connect("gui_input", callable_mp(this, &EditorAudioBus::_effects_gui_input));
 
 	send = memnew(OptionButton);
+	send->set_auto_translate_mode(AUTO_TRANSLATE_MODE_DISABLED);
 	send->set_clip_text(true);
 	send->connect("item_selected", callable_mp(this, &EditorAudioBus::_send_selected));
 	vb->add_child(send);
@@ -1151,6 +1152,7 @@ void EditorAudioBuses::_duplicate_bus(int p_which) {
 		ur->add_do_method(AudioServer::get_singleton(), "add_bus_effect", add_at_pos, AudioServer::get_singleton()->get_bus_effect(p_which, i));
 		ur->add_do_method(AudioServer::get_singleton(), "set_bus_effect_enabled", add_at_pos, i, AudioServer::get_singleton()->is_bus_effect_enabled(p_which, i));
 	}
+	ur->add_do_method(this, "_update_bus", add_at_pos);
 	ur->add_undo_method(AudioServer::get_singleton(), "remove_bus", add_at_pos);
 	ur->commit_action();
 }
@@ -1226,7 +1228,10 @@ void EditorAudioBuses::_load_layout() {
 void EditorAudioBuses::_load_default_layout() {
 	String layout_path = GLOBAL_GET("audio/buses/default_bus_layout");
 
-	Ref<AudioBusLayout> state = ResourceLoader::load(layout_path, "", ResourceFormatLoader::CACHE_MODE_IGNORE);
+	Ref<AudioBusLayout> state;
+	if (ResourceLoader::exists(layout_path)) {
+		state = ResourceLoader::load(layout_path, "", ResourceFormatLoader::CACHE_MODE_IGNORE);
+	}
 	if (state.is_null()) {
 		EditorNode::get_singleton()->show_warning(vformat(TTR("There is no '%s' file."), layout_path));
 		return;
diff --git a/editor/editor_build_profile.cpp b/editor/editor_build_profile.cpp
index 67bc55fdd6..c6ecaba230 100644
--- a/editor/editor_build_profile.cpp
+++ b/editor/editor_build_profile.cpp
@@ -861,7 +861,7 @@ EditorBuildProfileManager::EditorBuildProfileManager() {
 	class_list->connect("item_edited", callable_mp(this, &EditorBuildProfileManager::_class_list_item_edited), CONNECT_DEFERRED);
 	class_list->connect("item_collapsed", callable_mp(this, &EditorBuildProfileManager::_class_list_item_collapsed));
 	// It will be displayed once the user creates or chooses a profile.
-	main_vbc->add_margin_child(TTR("Configure Engine Build Profile:"), class_list, true);
+	main_vbc->add_margin_child(TTR("Configure Engine Compilation Profile:"), class_list, true);
 
 	description_bit = memnew(EditorHelpBit);
 	description_bit->set_custom_minimum_size(Size2(0, 80) * EDSCALE);
@@ -875,7 +875,7 @@ EditorBuildProfileManager::EditorBuildProfileManager() {
 	import_profile = memnew(EditorFileDialog);
 	add_child(import_profile);
 	import_profile->set_file_mode(EditorFileDialog::FILE_MODE_OPEN_FILE);
-	import_profile->add_filter("*.build", TTR("Engine Build Profile"));
+	import_profile->add_filter("*.build", TTR("Engine Compilation Profile"));
 	import_profile->connect("files_selected", callable_mp(this, &EditorBuildProfileManager::_import_profile));
 	import_profile->set_title(TTR("Load Profile"));
 	import_profile->set_access(EditorFileDialog::ACCESS_FILESYSTEM);
@@ -883,7 +883,7 @@ EditorBuildProfileManager::EditorBuildProfileManager() {
 	export_profile = memnew(EditorFileDialog);
 	add_child(export_profile);
 	export_profile->set_file_mode(EditorFileDialog::FILE_MODE_SAVE_FILE);
-	export_profile->add_filter("*.build", TTR("Engine Build Profile"));
+	export_profile->add_filter("*.build", TTR("Engine Compilation Profile"));
 	export_profile->connect("file_selected", callable_mp(this, &EditorBuildProfileManager::_export_profile));
 	export_profile->set_title(TTR("Export Profile"));
 	export_profile->set_access(EditorFileDialog::ACCESS_FILESYSTEM);
@@ -892,7 +892,7 @@ EditorBuildProfileManager::EditorBuildProfileManager() {
 	main_vbc->add_margin_child(TTR("Forced Classes on Detect:"), force_detect_classes);
 	force_detect_classes->connect("text_changed", callable_mp(this, &EditorBuildProfileManager::_force_detect_classes_changed));
 
-	set_title(TTR("Edit Build Configuration Profile"));
+	set_title(TTR("Edit Compilation Configuration Profile"));
 
 	singleton = this;
 }
diff --git a/editor/editor_data.cpp b/editor/editor_data.cpp
index 6ab29c1850..bdc6504417 100644
--- a/editor/editor_data.cpp
+++ b/editor/editor_data.cpp
@@ -722,7 +722,7 @@ bool EditorData::check_and_update_scene(int p_idx) {
 
 		new_scene->set_scene_file_path(edited_scene[p_idx].root->get_scene_file_path());
 		Node *old_root = edited_scene[p_idx].root;
-		old_root->replace_by(new_scene, false, false);
+		EditorNode::get_singleton()->set_edited_scene(new_scene);
 		memdelete(old_root);
 		edited_scene.write[p_idx].selection = new_selection;
 
diff --git a/editor/editor_file_system.h b/editor/editor_file_system.h
index e45d6f8be3..782d3eee38 100644
--- a/editor/editor_file_system.h
+++ b/editor/editor_file_system.h
@@ -304,6 +304,7 @@ public:
 	EditorFileSystemDirectory *get_filesystem();
 	bool is_scanning() const;
 	bool is_importing() const { return importing; }
+	bool doing_first_scan() const { return first_scan; }
 	float get_scanning_progress() const;
 	void scan();
 	void scan_changes();
diff --git a/editor/editor_help.cpp b/editor/editor_help.cpp
index bba1939f72..7fded0b807 100644
--- a/editor/editor_help.cpp
+++ b/editor/editor_help.cpp
@@ -96,6 +96,18 @@ const Vector<String> classes_with_csharp_differences = {
 };
 #endif
 
+const Vector<String> packed_array_types = {
+	"PackedByteArray",
+	"PackedColorArray",
+	"PackedFloat32Array",
+	"PackedFloat64Array",
+	"PackedInt32Array",
+	"PackedInt64Array",
+	"PackedStringArray",
+	"PackedVector2Array",
+	"PackedVector3Array",
+};
+
 // TODO: this is sometimes used directly as doc->something, other times as EditorHelp::get_doc_data(), which is thread-safe.
 // Might this be a problem?
 DocTools *EditorHelp::doc = nullptr;
@@ -2193,6 +2205,12 @@ void EditorHelp::_update_doc() {
 				}
 				has_prev_text = true;
 				_add_text(descr);
+				// Add copy note to built-in properties returning Packed*Array.
+				if (!cd.is_script_doc && packed_array_types.has(prop.type)) {
+					class_desc->add_newline();
+					class_desc->add_newline();
+					_add_text(vformat(TTR("[b]Note:[/b] The returned array is [i]copied[/i] and any changes to it will not update the original property value. See [%s] for more details."), prop.type));
+				}
 			} else if (!has_prev_text) {
 				class_desc->add_image(get_editor_theme_icon(SNAME("Error")));
 				class_desc->add_text(" ");
@@ -3173,21 +3191,34 @@ String EditorHelpBit::get_property_description(const StringName &p_class_name, c
 		for (const DocData::PropertyDoc &property : E->value.properties) {
 			String description_current = is_native ? DTR(property.description) : property.description;
 
-			const Vector<String> class_enum = property.enumeration.split(".");
-			const String enum_name = class_enum.size() >= 2 ? class_enum[1] : "";
-			if (!enum_name.is_empty()) {
+			String enum_class_name;
+			String enum_name;
+			if (CoreConstants::is_global_enum(property.enumeration)) {
+				enum_class_name = "@GlobalScope";
+				enum_name = property.enumeration;
+			} else {
+				const int dot_pos = property.enumeration.rfind(".");
+				if (dot_pos >= 0) {
+					enum_class_name = property.enumeration.left(dot_pos);
+					enum_name = property.enumeration.substr(dot_pos + 1);
+				}
+			}
+
+			if (!enum_class_name.is_empty() && !enum_name.is_empty()) {
 				// Classes can use enums from other classes, so check from which it came.
-				const HashMap<String, DocData::ClassDoc>::ConstIterator enum_class = dd->class_list.find(class_enum[0]);
+				const HashMap<String, DocData::ClassDoc>::ConstIterator enum_class = dd->class_list.find(enum_class_name);
 				if (enum_class) {
-					for (DocData::ConstantDoc val : enum_class->value.constants) {
+					const String enum_prefix = EditorPropertyNameProcessor::get_singleton()->process_name(enum_name, EditorPropertyNameProcessor::STYLE_CAPITALIZED) + " ";
+					for (DocData::ConstantDoc constant : enum_class->value.constants) {
 						// Don't display `_MAX` enum value descriptions, as these are never exposed in the inspector.
-						if (val.enumeration == enum_name && !val.name.ends_with("_MAX")) {
-							const String enum_value = EditorPropertyNameProcessor::get_singleton()->process_name(val.name, EditorPropertyNameProcessor::STYLE_CAPITALIZED);
-							const String enum_prefix = EditorPropertyNameProcessor::get_singleton()->process_name(enum_name, EditorPropertyNameProcessor::STYLE_CAPITALIZED) + " ";
-							const String enum_description = is_native ? DTR(val.description) : val.description;
-
-							// Prettify the enum value display, so that "<ENUM NAME>_<VALUE>" becomes "Value".
-							description_current = description_current.trim_prefix("\n") + vformat("\n[b]%s:[/b] %s", enum_value.trim_prefix(enum_prefix), enum_description.is_empty() ? ("[i]" + DTR("No description available.") + "[/i]") : enum_description);
+						if (constant.enumeration == enum_name && !constant.name.ends_with("_MAX")) {
+							// Prettify the enum value display, so that "<ENUM_NAME>_<ITEM>" becomes "Item".
+							const String item_name = EditorPropertyNameProcessor::get_singleton()->process_name(constant.name, EditorPropertyNameProcessor::STYLE_CAPITALIZED).trim_prefix(enum_prefix);
+							String item_descr = (is_native ? DTR(constant.description) : constant.description).strip_edges();
+							if (item_descr.is_empty()) {
+								item_descr = ("[i]" + DTR("No description available.") + "[/i]");
+							}
+							description_current += vformat("\n[b]%s:[/b] %s", item_name, item_descr);
 						}
 					}
 				}
@@ -3544,8 +3575,11 @@ void EditorHelpHighlighter::reset_cache() {
 }
 
 EditorHelpHighlighter::EditorHelpHighlighter() {
+	const Color text_color = EDITOR_GET("text_editor/theme/highlighting/text_color");
+
 #ifdef MODULE_GDSCRIPT_ENABLED
 	TextEdit *gdscript_text_edit = memnew(TextEdit);
+	gdscript_text_edit->add_theme_color_override("font_color", text_color);
 
 	Ref<GDScript> gdscript;
 	gdscript.instantiate();
@@ -3562,6 +3596,7 @@ EditorHelpHighlighter::EditorHelpHighlighter() {
 
 #ifdef MODULE_MONO_ENABLED
 	TextEdit *csharp_text_edit = memnew(TextEdit);
+	csharp_text_edit->add_theme_color_override("font_color", text_color);
 
 	// See GH-89610.
 	//Ref<CSharpScript> csharp;
diff --git a/editor/editor_layouts_dialog.cpp b/editor/editor_layouts_dialog.cpp
index db6fa2c035..52047c569d 100644
--- a/editor/editor_layouts_dialog.cpp
+++ b/editor/editor_layouts_dialog.cpp
@@ -31,8 +31,6 @@
 #include "editor_layouts_dialog.h"
 
 #include "core/io/config_file.h"
-#include "core/object/class_db.h"
-#include "core/os/keyboard.h"
 #include "editor/editor_settings.h"
 #include "editor/themes/editor_scale.h"
 #include "scene/gui/item_list.h"
@@ -60,7 +58,7 @@ void EditorLayoutsDialog::_update_ok_disable_state() {
 	if (layout_names->is_anything_selected()) {
 		get_ok_button()->set_disabled(false);
 	} else {
-		get_ok_button()->set_disabled(!name->is_visible() || name->get_text().is_empty());
+		get_ok_button()->set_disabled(!name->is_visible() || name->get_text().strip_edges().is_empty());
 	}
 }
 
@@ -80,8 +78,8 @@ void EditorLayoutsDialog::ok_pressed() {
 		for (int i = 0; i < selected_items.size(); ++i) {
 			emit_signal(SNAME("name_confirmed"), layout_names->get_item_text(selected_items[i]));
 		}
-	} else if (name->is_visible() && !name->get_text().is_empty()) {
-		emit_signal(SNAME("name_confirmed"), name->get_text());
+	} else if (name->is_visible() && !name->get_text().strip_edges().is_empty()) {
+		emit_signal(SNAME("name_confirmed"), name->get_text().strip_edges());
 	}
 }
 
diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp
index 1c079bfca2..4fb1a86ce2 100644
--- a/editor/editor_node.cpp
+++ b/editor/editor_node.cpp
@@ -2136,10 +2136,6 @@ void EditorNode::_dialog_action(String p_file) {
 
 		} break;
 		case SETTINGS_LAYOUT_DELETE: {
-			if (p_file.is_empty()) {
-				return;
-			}
-
 			Ref<ConfigFile> config;
 			config.instantiate();
 			Error err = config->load(EditorSettings::get_singleton()->get_editor_layouts_config());
@@ -2896,9 +2892,6 @@ void EditorNode::_menu_option_confirm(int p_option, bool p_confirmed) {
 				}
 			}
 		} break;
-		case TOOLS_BUILD_PROFILE_MANAGER: {
-			build_profile_manager->popup_centered_clamped(Size2(700, 800) * EDSCALE, 0.8);
-		} break;
 		case RUN_USER_DATA_FOLDER: {
 			// Ensure_user_data_dir() to prevent the edge case: "Open User Data Folder" won't work after the project was renamed in ProjectSettingsEditor unless the project is saved.
 			OS::get_singleton()->ensure_user_data_dir();
@@ -3203,6 +3196,9 @@ void EditorNode::_tool_menu_option(int p_idx) {
 		case TOOLS_ORPHAN_RESOURCES: {
 			orphan_resources->show();
 		} break;
+		case TOOLS_BUILD_PROFILE_MANAGER: {
+			build_profile_manager->popup_centered_clamped(Size2(700, 800) * EDSCALE, 0.8);
+		} break;
 		case TOOLS_SURFACE_UPGRADE: {
 			surface_upgrade_dialog->popup_on_demand();
 		} break;
@@ -6781,13 +6777,12 @@ EditorNode::EditorNode() {
 #endif
 
 	project_menu->add_separator();
-	project_menu->add_item(TTR("Customize Engine Build Configuration..."), TOOLS_BUILD_PROFILE_MANAGER);
-	project_menu->add_separator();
 
 	tool_menu = memnew(PopupMenu);
 	tool_menu->connect("index_pressed", callable_mp(this, &EditorNode::_tool_menu_option));
 	project_menu->add_submenu_node_item(TTR("Tools"), tool_menu);
 	tool_menu->add_shortcut(ED_SHORTCUT_AND_COMMAND("editor/orphan_resource_explorer", TTR("Orphan Resource Explorer...")), TOOLS_ORPHAN_RESOURCES);
+	tool_menu->add_shortcut(ED_SHORTCUT_AND_COMMAND("editor/engine_compilation_configuration_editor", TTR("Engine Compilation Configuration Editor...")), TOOLS_BUILD_PROFILE_MANAGER);
 	tool_menu->add_shortcut(ED_SHORTCUT_AND_COMMAND("editor/upgrade_mesh_surfaces", TTR("Upgrade Mesh Surfaces...")), TOOLS_SURFACE_UPGRADE);
 
 	project_menu->add_separator();
diff --git a/editor/editor_properties.cpp b/editor/editor_properties.cpp
index 4858fcf78f..ea364d8a0d 100644
--- a/editor/editor_properties.cpp
+++ b/editor/editor_properties.cpp
@@ -2755,7 +2755,8 @@ void EditorPropertyNodePath::_node_assign() {
 	Variant val = get_edited_property_value();
 	Node *n = nullptr;
 	if (val.get_type() == Variant::Type::NODE_PATH) {
-		n = get_base_node()->get_node_or_null(val);
+		Node *base_node = get_base_node();
+		n = base_node == nullptr ? nullptr : base_node->get_node_or_null(val);
 	} else {
 		n = Object::cast_to<Node>(val);
 	}
diff --git a/editor/export/editor_export_platform.cpp b/editor/export/editor_export_platform.cpp
index d2a187bcfd..33fdd7418e 100644
--- a/editor/export/editor_export_platform.cpp
+++ b/editor/export/editor_export_platform.cpp
@@ -864,19 +864,23 @@ String EditorExportPlatform::_get_script_encryption_key(const Ref<EditorExportPr
 	return p_preset->get_script_encryption_key().to_lower();
 }
 
-Vector<String> EditorExportPlatform::get_main_pack_forced_export_files() {
-	// First, get files required by any PCK.
-	Vector<String> files = get_forced_export_files();
+Vector<String> EditorExportPlatform::get_forced_export_files() {
+	Vector<String> files;
+
+	files.push_back(ProjectSettings::get_singleton()->get_global_class_list_path());
 
 	String icon = GLOBAL_GET("application/config/icon");
+	String splash = GLOBAL_GET("application/boot_splash/image");
 	if (!icon.is_empty() && FileAccess::exists(icon)) {
 		files.push_back(icon);
 	}
-
-	String splash = GLOBAL_GET("application/boot_splash/image");
-	if (!splash.is_empty() && icon != splash && FileAccess::exists(splash)) {
+	if (!splash.is_empty() && FileAccess::exists(splash) && icon != splash) {
 		files.push_back(splash);
 	}
+	String resource_cache_file = ResourceUID::get_cache_file();
+	if (FileAccess::exists(resource_cache_file)) {
+		files.push_back(resource_cache_file);
+	}
 
 	String extension_list_config_file = GDExtension::get_extension_list_config_file();
 	if (FileAccess::exists(extension_list_config_file)) {
@@ -909,19 +913,7 @@ Vector<String> EditorExportPlatform::get_main_pack_forced_export_files() {
 	return files;
 }
 
-Vector<String> EditorExportPlatform::get_forced_export_files() {
-	Vector<String> files;
-	files.push_back(ProjectSettings::get_singleton()->get_global_class_list_path());
-
-	String resource_cache_file = ResourceUID::get_cache_file();
-	if (FileAccess::exists(resource_cache_file)) {
-		files.push_back(resource_cache_file);
-	}
-
-	return files;
-}
-
-Error EditorExportPlatform::export_project_files(bool p_main_pack, const Ref<EditorExportPreset> &p_preset, bool p_debug, EditorExportSaveFunction p_func, void *p_udata, EditorExportSaveSharedObject p_so_func) {
+Error EditorExportPlatform::export_project_files(const Ref<EditorExportPreset> &p_preset, bool p_debug, EditorExportSaveFunction p_func, void *p_udata, EditorExportSaveSharedObject p_so_func) {
 	//figure out paths of files that will be exported
 	HashSet<String> paths;
 	Vector<String> path_remaps;
@@ -968,11 +960,9 @@ Error EditorExportPlatform::export_project_files(bool p_main_pack, const Ref<Edi
 		}
 	}
 
-	if (p_main_pack) {
-		// Add native icons to non-resource include list.
-		_edit_filter_list(paths, String("*.icns"), false);
-		_edit_filter_list(paths, String("*.ico"), false);
-	}
+	//add native icons to non-resource include list
+	_edit_filter_list(paths, String("*.icns"), false);
+	_edit_filter_list(paths, String("*.ico"), false);
 
 	_edit_filter_list(paths, p_preset->get_include_filter(), false);
 	_edit_filter_list(paths, p_preset->get_exclude_filter(), true);
@@ -1404,12 +1394,7 @@ Error EditorExportPlatform::export_project_files(bool p_main_pack, const Ref<Edi
 		}
 	}
 
-	Vector<String> forced_export;
-	if (p_main_pack) {
-		forced_export = get_main_pack_forced_export_files();
-	} else {
-		forced_export = get_forced_export_files();
-	}
+	Vector<String> forced_export = get_forced_export_files();
 	for (int i = 0; i < forced_export.size(); i++) {
 		Vector<uint8_t> array = FileAccess::get_file_as_bytes(forced_export[i]);
 		err = p_func(p_udata, forced_export[i], array, idx, total, enc_in_filters, enc_ex_filters, key);
@@ -1418,19 +1403,13 @@ Error EditorExportPlatform::export_project_files(bool p_main_pack, const Ref<Edi
 		}
 	}
 
-	if (p_main_pack) {
-		String config_file = "project.binary";
-		String engine_cfb = EditorPaths::get_singleton()->get_cache_dir().path_join("tmp" + config_file);
-		ProjectSettings::get_singleton()->save_custom(engine_cfb, custom_map, custom_list);
-		Vector<uint8_t> data = FileAccess::get_file_as_bytes(engine_cfb);
-		DirAccess::remove_file_or_error(engine_cfb);
-		err = p_func(p_udata, "res://" + config_file, data, idx, total, enc_in_filters, enc_ex_filters, key);
-		if (err != OK) {
-			return err;
-		}
-	}
+	String config_file = "project.binary";
+	String engine_cfb = EditorPaths::get_singleton()->get_cache_dir().path_join("tmp" + config_file);
+	ProjectSettings::get_singleton()->save_custom(engine_cfb, custom_map, custom_list);
+	Vector<uint8_t> data = FileAccess::get_file_as_bytes(engine_cfb);
+	DirAccess::remove_file_or_error(engine_cfb);
 
-	return OK;
+	return p_func(p_udata, "res://" + config_file, data, idx, total, enc_in_filters, enc_ex_filters, key);
 }
 
 Error EditorExportPlatform::_add_shared_object(void *p_userdata, const SharedObject &p_so) {
@@ -1559,7 +1538,7 @@ void EditorExportPlatform::zip_folder_recursive(zipFile &p_zip, const String &p_
 	da->list_dir_end();
 }
 
-Error EditorExportPlatform::save_pack(bool p_main_pack, const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path, Vector<SharedObject> *p_so_files, bool p_embed, int64_t *r_embedded_start, int64_t *r_embedded_size) {
+Error EditorExportPlatform::save_pack(const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path, Vector<SharedObject> *p_so_files, bool p_embed, int64_t *r_embedded_start, int64_t *r_embedded_size) {
 	EditorProgress ep("savepack", TTR("Packing"), 102, true);
 
 	// Create the temporary export directory if it doesn't exist.
@@ -1578,7 +1557,7 @@ Error EditorExportPlatform::save_pack(bool p_main_pack, const Ref<EditorExportPr
 	pd.f = ftmp;
 	pd.so_files = p_so_files;
 
-	Error err = export_project_files(p_main_pack, p_preset, p_debug, _save_pack_file, &pd, _add_shared_object);
+	Error err = export_project_files(p_preset, p_debug, _save_pack_file, &pd, _add_shared_object);
 
 	// Close temp file.
 	pd.f.unref();
@@ -1796,7 +1775,7 @@ Error EditorExportPlatform::save_zip(const Ref<EditorExportPreset> &p_preset, bo
 	zd.ep = &ep;
 	zd.zip = zip;
 
-	Error err = export_project_files(false, p_preset, p_debug, _save_zip_file, &zd);
+	Error err = export_project_files(p_preset, p_debug, _save_zip_file, &zd);
 	if (err != OK && err != ERR_SKIP) {
 		add_message(EXPORT_MESSAGE_ERROR, TTR("Save ZIP"), TTR("Failed to export project files."));
 	}
@@ -1808,7 +1787,7 @@ Error EditorExportPlatform::save_zip(const Ref<EditorExportPreset> &p_preset, bo
 
 Error EditorExportPlatform::export_pack(const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path, int p_flags) {
 	ExportNotifier notifier(*this, p_preset, p_debug, p_path, p_flags);
-	return save_pack(false, p_preset, p_debug, p_path);
+	return save_pack(p_preset, p_debug, p_path);
 }
 
 Error EditorExportPlatform::export_zip(const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path, int p_flags) {
diff --git a/editor/export/editor_export_platform.h b/editor/export/editor_export_platform.h
index 4c4e64b1a6..26e1f86c8b 100644
--- a/editor/export/editor_export_platform.h
+++ b/editor/export/editor_export_platform.h
@@ -203,7 +203,6 @@ public:
 		return worst_type;
 	}
 
-	static Vector<String> get_main_pack_forced_export_files();
 	static Vector<String> get_forced_export_files();
 
 	virtual bool fill_log_messages(RichTextLabel *p_log, Error p_err);
@@ -217,9 +216,9 @@ public:
 	virtual String get_name() const = 0;
 	virtual Ref<Texture2D> get_logo() const = 0;
 
-	Error export_project_files(bool p_main_pack, const Ref<EditorExportPreset> &p_preset, bool p_debug, EditorExportSaveFunction p_func, void *p_udata, EditorExportSaveSharedObject p_so_func = nullptr);
+	Error export_project_files(const Ref<EditorExportPreset> &p_preset, bool p_debug, EditorExportSaveFunction p_func, void *p_udata, EditorExportSaveSharedObject p_so_func = nullptr);
 
-	Error save_pack(bool p_main_pack, const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path, Vector<SharedObject> *p_so_files = nullptr, bool p_embed = false, int64_t *r_embedded_start = nullptr, int64_t *r_embedded_size = nullptr);
+	Error save_pack(const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path, Vector<SharedObject> *p_so_files = nullptr, bool p_embed = false, int64_t *r_embedded_start = nullptr, int64_t *r_embedded_size = nullptr);
 	Error save_zip(const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path);
 
 	virtual bool poll_export() { return false; }
diff --git a/editor/export/editor_export_platform_pc.cpp b/editor/export/editor_export_platform_pc.cpp
index 7cd8cb9b29..cdaf18b346 100644
--- a/editor/export/editor_export_platform_pc.cpp
+++ b/editor/export/editor_export_platform_pc.cpp
@@ -151,15 +151,28 @@ Error EditorExportPlatformPC::prepare_template(const Ref<EditorExportPreset> &p_
 		return ERR_FILE_NOT_FOUND;
 	}
 
-	String wrapper_template_path = template_path.get_basename() + "_console.exe";
+	// Matching the extensions in platform/windows/console_wrapper_windows.cpp
+	static const char *const wrapper_extensions[] = {
+		".console.exe",
+		"_console.exe",
+		" console.exe",
+		"console.exe",
+		nullptr,
+	};
 	int con_wrapper_mode = p_preset->get("debug/export_console_wrapper");
 	bool copy_wrapper = (con_wrapper_mode == 1 && p_debug) || (con_wrapper_mode == 2);
 
 	Ref<DirAccess> da = DirAccess::create(DirAccess::ACCESS_FILESYSTEM);
 	da->make_dir_recursive(p_path.get_base_dir());
 	Error err = da->copy(template_path, p_path, get_chmod_flags());
-	if (err == OK && copy_wrapper && FileAccess::exists(wrapper_template_path)) {
-		err = da->copy(wrapper_template_path, p_path.get_basename() + ".console.exe", get_chmod_flags());
+	if (err == OK && copy_wrapper) {
+		for (int i = 0; wrapper_extensions[i]; ++i) {
+			const String wrapper_path = template_path.get_basename() + wrapper_extensions[i];
+			if (FileAccess::exists(wrapper_path)) {
+				err = da->copy(wrapper_path, p_path.get_basename() + ".console.exe", get_chmod_flags());
+				break;
+			}
+		}
 	}
 	if (err != OK) {
 		add_message(EXPORT_MESSAGE_ERROR, TTR("Prepare Template"), TTR("Failed to copy export template."));
@@ -181,7 +194,7 @@ Error EditorExportPlatformPC::export_project_data(const Ref<EditorExportPreset>
 
 	int64_t embedded_pos;
 	int64_t embedded_size;
-	Error err = save_pack(true, p_preset, p_debug, pck_path, &so_files, p_preset->get("binary_format/embed_pck"), &embedded_pos, &embedded_size);
+	Error err = save_pack(p_preset, p_debug, pck_path, &so_files, p_preset->get("binary_format/embed_pck"), &embedded_pos, &embedded_size);
 	if (err == OK && p_preset->get("binary_format/embed_pck")) {
 		if (embedded_size >= 0x100000000 && String(p_preset->get("binary_format/architecture")).contains("32")) {
 			add_message(EXPORT_MESSAGE_ERROR, TTR("PCK Embedding"), TTR("On 32-bit exports the embedded PCK cannot be bigger than 4 GiB."));
diff --git a/editor/filesystem_dock.cpp b/editor/filesystem_dock.cpp
index 233f20a8b3..3b6ce8d396 100644
--- a/editor/filesystem_dock.cpp
+++ b/editor/filesystem_dock.cpp
@@ -2250,6 +2250,8 @@ void FileSystemDock::_file_option(int p_option, const Vector<String> &p_selected
 					terminal_emulator_args.push_back("--working-directory");
 				} else if (chosen_terminal_emulator.ends_with("urxvt")) {
 					terminal_emulator_args.push_back("-cd");
+				} else if (chosen_terminal_emulator.ends_with("xfce4-terminal")) {
+					terminal_emulator_args.push_back("--working-directory");
 				}
 			}
 #endif
@@ -2529,6 +2531,14 @@ void FileSystemDock::_file_option(int p_option, const Vector<String> &p_selected
 			}
 		} break;
 
+		case FILE_COPY_ABSOLUTE_PATH: {
+			if (!p_selected.is_empty()) {
+				const String &fpath = p_selected[0];
+				const String absolute_path = ProjectSettings::get_singleton()->globalize_path(fpath);
+				DisplayServer::get_singleton()->clipboard_set(absolute_path);
+			}
+		} break;
+
 		case FILE_COPY_UID: {
 			if (!p_selected.is_empty()) {
 				ResourceUID::ID uid = ResourceLoader::get_resource_uid(p_selected[0]);
@@ -3191,6 +3201,7 @@ void FileSystemDock::_file_and_folders_fill_popup(PopupMenu *p_popup, const Vect
 
 	if (p_paths.size() == 1) {
 		p_popup->add_icon_shortcut(get_editor_theme_icon(SNAME("ActionCopy")), ED_GET_SHORTCUT("filesystem_dock/copy_path"), FILE_COPY_PATH);
+		p_popup->add_shortcut(ED_GET_SHORTCUT("filesystem_dock/copy_absolute_path"), FILE_COPY_ABSOLUTE_PATH);
 		if (ResourceLoader::get_resource_uid(p_paths[0]) != ResourceUID::INVALID_ID) {
 			p_popup->add_icon_shortcut(get_editor_theme_icon(SNAME("Instance")), ED_GET_SHORTCUT("filesystem_dock/copy_uid"), FILE_COPY_UID);
 		}
@@ -3479,6 +3490,8 @@ void FileSystemDock::_tree_gui_input(Ref<InputEvent> p_event) {
 			_tree_rmb_option(FILE_DUPLICATE);
 		} else if (ED_IS_SHORTCUT("filesystem_dock/copy_path", p_event)) {
 			_tree_rmb_option(FILE_COPY_PATH);
+		} else if (ED_IS_SHORTCUT("filesystem_dock/copy_absolute_path", p_event)) {
+			_tree_rmb_option(FILE_COPY_ABSOLUTE_PATH);
 		} else if (ED_IS_SHORTCUT("filesystem_dock/copy_uid", p_event)) {
 			_tree_rmb_option(FILE_COPY_UID);
 		} else if (ED_IS_SHORTCUT("filesystem_dock/delete", p_event)) {
@@ -3551,6 +3564,8 @@ void FileSystemDock::_file_list_gui_input(Ref<InputEvent> p_event) {
 			_file_list_rmb_option(FILE_DUPLICATE);
 		} else if (ED_IS_SHORTCUT("filesystem_dock/copy_path", p_event)) {
 			_file_list_rmb_option(FILE_COPY_PATH);
+		} else if (ED_IS_SHORTCUT("filesystem_dock/copy_absolute_path", p_event)) {
+			_file_list_rmb_option(FILE_COPY_ABSOLUTE_PATH);
 		} else if (ED_IS_SHORTCUT("filesystem_dock/delete", p_event)) {
 			_file_list_rmb_option(FILE_REMOVE);
 		} else if (ED_IS_SHORTCUT("filesystem_dock/rename", p_event)) {
@@ -3854,6 +3869,7 @@ FileSystemDock::FileSystemDock() {
 
 	// `KeyModifierMask::CMD_OR_CTRL | Key::C` conflicts with other editor shortcuts.
 	ED_SHORTCUT("filesystem_dock/copy_path", TTR("Copy Path"), KeyModifierMask::CMD_OR_CTRL | KeyModifierMask::SHIFT | Key::C);
+	ED_SHORTCUT("filesystem_dock/copy_absolute_path", TTR("Copy Absolute Path"));
 	ED_SHORTCUT("filesystem_dock/copy_uid", TTR("Copy UID"));
 	ED_SHORTCUT("filesystem_dock/duplicate", TTR("Duplicate..."), KeyModifierMask::CMD_OR_CTRL | Key::D);
 	ED_SHORTCUT("filesystem_dock/delete", TTR("Delete"), Key::KEY_DELETE);
diff --git a/editor/filesystem_dock.h b/editor/filesystem_dock.h
index b950075928..c9fe3a8baf 100644
--- a/editor/filesystem_dock.h
+++ b/editor/filesystem_dock.h
@@ -129,6 +129,7 @@ private:
 		FILE_OPEN_EXTERNAL,
 		FILE_OPEN_IN_TERMINAL,
 		FILE_COPY_PATH,
+		FILE_COPY_ABSOLUTE_PATH,
 		FILE_COPY_UID,
 		FOLDER_EXPAND_ALL,
 		FOLDER_COLLAPSE_ALL,
diff --git a/editor/groups_editor.cpp b/editor/groups_editor.cpp
index bec13b710d..c4e38b327a 100644
--- a/editor/groups_editor.cpp
+++ b/editor/groups_editor.cpp
@@ -157,10 +157,14 @@ void GroupsEditor::_update_groups() {
 
 	_load_scene_groups(scene_root_node);
 
-	for (const KeyValue<StringName, bool> &E : scene_groups) {
-		if (global_groups.has(E.key)) {
-			scene_groups.erase(E.key);
+	for (HashMap<StringName, bool>::Iterator E = scene_groups.begin(); E;) {
+		HashMap<StringName, bool>::Iterator next = E;
+		++next;
+
+		if (global_groups.has(E->key)) {
+			scene_groups.erase(E->key);
 		}
+		E = next;
 	}
 
 	updating_groups = false;
diff --git a/editor/gui/scene_tree_editor.h b/editor/gui/scene_tree_editor.h
index b4d9644f16..9ae1e99a27 100644
--- a/editor/gui/scene_tree_editor.h
+++ b/editor/gui/scene_tree_editor.h
@@ -160,6 +160,7 @@ public:
 
 	void set_marked(const HashSet<Node *> &p_marked, bool p_selectable = true, bool p_children_selectable = true);
 	void set_marked(Node *p_marked, bool p_selectable = true, bool p_children_selectable = true);
+	bool has_marked() const { return !marked.is_empty(); }
 	void set_selected(Node *p_node, bool p_emit_selected = true);
 	Node *get_selected();
 	void set_can_rename(bool p_can_rename) { can_rename = p_can_rename; }
diff --git a/editor/import/resource_importer_csv_translation.cpp b/editor/import/resource_importer_csv_translation.cpp
index d56b426c86..d2705ac98a 100644
--- a/editor/import/resource_importer_csv_translation.cpp
+++ b/editor/import/resource_importer_csv_translation.cpp
@@ -122,11 +122,12 @@ Error ResourceImporterCSVTranslation::import(const String &p_source_file, const
 		if (!key.is_empty()) {
 			ERR_CONTINUE_MSG(line.size() != locales.size() + (int)skipped_locales.size() + 1, vformat("Error importing CSV translation: expected %d locale(s), but the '%s' key has %d locale(s).", locales.size(), key, line.size() - 1));
 
+			int write_index = 0; // Keep track of translations written in case some locales are skipped.
 			for (int i = 1; i < line.size(); i++) {
 				if (skipped_locales.has(i)) {
 					continue;
 				}
-				translations.write[i - 1]->add_message(key, line[i].c_unescape());
+				translations.write[write_index++]->add_message(key, line[i].c_unescape());
 			}
 		}
 	} while (!f->eof_reached());
diff --git a/editor/plugins/node_3d_editor_plugin.cpp b/editor/plugins/node_3d_editor_plugin.cpp
index 7e271bb68e..88250ea3c3 100644
--- a/editor/plugins/node_3d_editor_plugin.cpp
+++ b/editor/plugins/node_3d_editor_plugin.cpp
@@ -4519,7 +4519,7 @@ bool Node3DEditorViewport::can_drop_data_fw(const Point2 &p_point, const Variant
 						Ref<BaseMaterial3D> base_mat = res;
 						Ref<ShaderMaterial> shader_mat = res;
 
-						if (base_mat.is_null() && !shader_mat.is_null()) {
+						if (base_mat.is_null() && shader_mat.is_null()) {
 							continue;
 						}
 
@@ -6639,8 +6639,13 @@ void fragment() {
 
 			for (int j = 0; j < 4; j++) {
 				Transform3D t = Transform3D();
-				t = t.scaled(axis * distances[j + 1]);
-				t = t.translated(axis * distances[j]);
+				if (distances[j] > 0.0) {
+					t = t.scaled(axis * distances[j + 1]);
+					t = t.translated(axis * distances[j]);
+				} else {
+					t = t.scaled(axis * distances[j]);
+					t = t.translated(axis * distances[j + 1]);
+				}
 				RenderingServer::get_singleton()->multimesh_instance_set_transform(origin_multimesh, i * 4 + j, t);
 				RenderingServer::get_singleton()->multimesh_instance_set_color(origin_multimesh, i * 4 + j, origin_color);
 			}
diff --git a/editor/plugins/path_2d_editor_plugin.cpp b/editor/plugins/path_2d_editor_plugin.cpp
index cc9114db39..4fff3bfb8a 100644
--- a/editor/plugins/path_2d_editor_plugin.cpp
+++ b/editor/plugins/path_2d_editor_plugin.cpp
@@ -225,17 +225,20 @@ bool Path2DEditor::forward_gui_input(const Ref<InputEvent> &p_event) {
 					break;
 
 				case ACTION_MOVING_POINT:
-				case ACTION_MOVING_NEW_POINT: {
 					if (original_mouse_pos != gpoint) {
-						if (action == ACTION_MOVING_POINT) {
-							undo_redo->create_action(TTR("Move Point in Curve"));
-							undo_redo->add_undo_method(curve.ptr(), "set_point_position", action_point, moving_from);
-						}
+						undo_redo->create_action(TTR("Move Point in Curve"));
+						undo_redo->add_undo_method(curve.ptr(), "set_point_position", action_point, moving_from);
 						undo_redo->add_do_method(curve.ptr(), "set_point_position", action_point, cpoint);
 						undo_redo->add_do_method(canvas_item_editor, "update_viewport");
 						undo_redo->add_undo_method(canvas_item_editor, "update_viewport");
 						undo_redo->commit_action(false);
 					}
+					break;
+				case ACTION_MOVING_NEW_POINT: {
+					undo_redo->add_do_method(curve.ptr(), "set_point_position", action_point, cpoint);
+					undo_redo->add_undo_method(canvas_item_editor, "update_viewport");
+					undo_redo->add_do_method(canvas_item_editor, "update_viewport");
+					undo_redo->commit_action(false);
 				} break;
 
 				case ACTION_MOVING_IN: {
diff --git a/editor/plugins/texture_region_editor_plugin.cpp b/editor/plugins/texture_region_editor_plugin.cpp
index 6c7a3f776e..9498e980ec 100644
--- a/editor/plugins/texture_region_editor_plugin.cpp
+++ b/editor/plugins/texture_region_editor_plugin.cpp
@@ -37,13 +37,16 @@
 #include "editor/editor_string_names.h"
 #include "editor/editor_undo_redo_manager.h"
 #include "editor/themes/editor_scale.h"
-#include "scene/gui/check_box.h"
+#include "scene/2d/sprite_2d.h"
+#include "scene/3d/sprite_3d.h"
+#include "scene/gui/nine_patch_rect.h"
 #include "scene/gui/option_button.h"
 #include "scene/gui/panel_container.h"
 #include "scene/gui/separator.h"
 #include "scene/gui/spin_box.h"
 #include "scene/gui/view_panner.h"
 #include "scene/resources/atlas_texture.h"
+#include "scene/resources/style_box_texture.h"
 
 Transform2D TextureRegionEditor::_get_offset_transform() const {
 	Transform2D mtx;
@@ -94,7 +97,7 @@ void TextureRegionEditor::_texture_overlay_draw() {
 					last_cell = cell;
 				}
 			} else {
-				for (int i = 0; i < s.width; i++) {
+				for (int i = 0; i < s.width + snap_separation.x; i++) {
 					int cell = Math::fast_ftoi(Math::floor((mtx.affine_inverse().xform(Vector2(i, 0)).x - snap_offset.x) / (snap_step.x + snap_separation.x)));
 					if (i == 0) {
 						last_cell = cell;
@@ -120,7 +123,7 @@ void TextureRegionEditor::_texture_overlay_draw() {
 					last_cell = cell;
 				}
 			} else {
-				for (int i = 0; i < s.height; i++) {
+				for (int i = 0; i < s.height + snap_separation.y; i++) {
 					int cell = Math::fast_ftoi(Math::floor((mtx.affine_inverse().xform(Vector2(0, i)).y - snap_offset.y) / (snap_step.y + snap_separation.y)));
 					if (i == 0) {
 						last_cell = cell;
@@ -281,6 +284,17 @@ void TextureRegionEditor::_draw_margin_line(Vector2 p_from, Vector2 p_to) {
 	}
 }
 
+void TextureRegionEditor::_set_grid_parameters_clamping(bool p_enabled) {
+	sb_off_x->set_allow_lesser(!p_enabled);
+	sb_off_x->set_allow_greater(!p_enabled);
+	sb_off_y->set_allow_lesser(!p_enabled);
+	sb_off_y->set_allow_greater(!p_enabled);
+	sb_step_x->set_allow_greater(!p_enabled);
+	sb_step_y->set_allow_greater(!p_enabled);
+	sb_sep_x->set_allow_greater(!p_enabled);
+	sb_sep_y->set_allow_greater(!p_enabled);
+}
+
 void TextureRegionEditor::_texture_overlay_input(const Ref<InputEvent> &p_input) {
 	if (panner->gui_input(p_input)) {
 		return;
@@ -842,6 +856,7 @@ void TextureRegionEditor::_notification(int p_what) {
 			}
 
 			if (!is_visible()) {
+				EditorSettings::get_singleton()->set_project_metadata("texture_region_editor", "snap_offset", snap_offset);
 				EditorSettings::get_singleton()->set_project_metadata("texture_region_editor", "snap_step", snap_step);
 				EditorSettings::get_singleton()->set_project_metadata("texture_region_editor", "snap_separation", snap_separation);
 				EditorSettings::get_singleton()->set_project_metadata("texture_region_editor", "snap_mode", snap_mode);
@@ -973,6 +988,7 @@ void TextureRegionEditor::_texture_changed() {
 void TextureRegionEditor::_edit_region() {
 	const Ref<Texture2D> object_texture = _get_edited_object_texture();
 	if (object_texture.is_null()) {
+		_set_grid_parameters_clamping(false);
 		_zoom_reset();
 		hscroll->hide();
 		vscroll->hide();
@@ -1057,6 +1073,26 @@ void TextureRegionEditor::_edit_region() {
 		}
 	}
 
+	// Avoiding clamping with mismatched min/max.
+	_set_grid_parameters_clamping(false);
+	const Size2 tex_size = object_texture->get_size();
+	sb_off_x->set_min(-tex_size.x);
+	sb_off_x->set_max(tex_size.x);
+	sb_off_y->set_min(-tex_size.y);
+	sb_off_y->set_max(tex_size.y);
+	sb_step_x->set_max(tex_size.x);
+	sb_step_y->set_max(tex_size.y);
+	sb_sep_x->set_max(tex_size.x);
+	sb_sep_y->set_max(tex_size.y);
+
+	_set_grid_parameters_clamping(true);
+	sb_off_x->set_value(snap_offset.x);
+	sb_off_y->set_value(snap_offset.y);
+	sb_step_x->set_value(snap_step.x);
+	sb_step_y->set_value(snap_step.y);
+	sb_sep_x->set_value(snap_separation.x);
+	sb_sep_y->set_value(snap_separation.y);
+
 	_update_rect();
 	texture_preview->queue_redraw();
 	texture_overlay->queue_redraw();
@@ -1080,6 +1116,7 @@ TextureRegionEditor::TextureRegionEditor() {
 	set_ok_button_text(TTR("Close"));
 
 	// A power-of-two value works better as a default grid size.
+	snap_offset = EditorSettings::get_singleton()->get_project_metadata("texture_region_editor", "snap_offset", Vector2());
 	snap_step = EditorSettings::get_singleton()->get_project_metadata("texture_region_editor", "snap_step", Vector2(8, 8));
 	snap_separation = EditorSettings::get_singleton()->get_project_metadata("texture_region_editor", "snap_separation", Vector2());
 	snap_mode = (SnapMode)(int)EditorSettings::get_singleton()->get_project_metadata("texture_region_editor", "snap_mode", SNAP_NONE);
@@ -1110,19 +1147,13 @@ TextureRegionEditor::TextureRegionEditor() {
 	hb_grid->add_child(memnew(Label(TTR("Offset:"))));
 
 	sb_off_x = memnew(SpinBox);
-	sb_off_x->set_min(-256);
-	sb_off_x->set_max(256);
 	sb_off_x->set_step(1);
-	sb_off_x->set_value(snap_offset.x);
 	sb_off_x->set_suffix("px");
 	sb_off_x->connect("value_changed", callable_mp(this, &TextureRegionEditor::_set_snap_off_x));
 	hb_grid->add_child(sb_off_x);
 
 	sb_off_y = memnew(SpinBox);
-	sb_off_y->set_min(-256);
-	sb_off_y->set_max(256);
 	sb_off_y->set_step(1);
-	sb_off_y->set_value(snap_offset.y);
 	sb_off_y->set_suffix("px");
 	sb_off_y->connect("value_changed", callable_mp(this, &TextureRegionEditor::_set_snap_off_y));
 	hb_grid->add_child(sb_off_y);
@@ -1131,19 +1162,15 @@ TextureRegionEditor::TextureRegionEditor() {
 	hb_grid->add_child(memnew(Label(TTR("Step:"))));
 
 	sb_step_x = memnew(SpinBox);
-	sb_step_x->set_min(-256);
-	sb_step_x->set_max(256);
+	sb_step_x->set_min(0);
 	sb_step_x->set_step(1);
-	sb_step_x->set_value(snap_step.x);
 	sb_step_x->set_suffix("px");
 	sb_step_x->connect("value_changed", callable_mp(this, &TextureRegionEditor::_set_snap_step_x));
 	hb_grid->add_child(sb_step_x);
 
 	sb_step_y = memnew(SpinBox);
-	sb_step_y->set_min(-256);
-	sb_step_y->set_max(256);
+	sb_step_y->set_min(0);
 	sb_step_y->set_step(1);
-	sb_step_y->set_value(snap_step.y);
 	sb_step_y->set_suffix("px");
 	sb_step_y->connect("value_changed", callable_mp(this, &TextureRegionEditor::_set_snap_step_y));
 	hb_grid->add_child(sb_step_y);
@@ -1153,24 +1180,29 @@ TextureRegionEditor::TextureRegionEditor() {
 
 	sb_sep_x = memnew(SpinBox);
 	sb_sep_x->set_min(0);
-	sb_sep_x->set_max(256);
 	sb_sep_x->set_step(1);
-	sb_sep_x->set_value(snap_separation.x);
 	sb_sep_x->set_suffix("px");
 	sb_sep_x->connect("value_changed", callable_mp(this, &TextureRegionEditor::_set_snap_sep_x));
 	hb_grid->add_child(sb_sep_x);
 
 	sb_sep_y = memnew(SpinBox);
 	sb_sep_y->set_min(0);
-	sb_sep_y->set_max(256);
 	sb_sep_y->set_step(1);
-	sb_sep_y->set_value(snap_separation.y);
 	sb_sep_y->set_suffix("px");
 	sb_sep_y->connect("value_changed", callable_mp(this, &TextureRegionEditor::_set_snap_sep_y));
 	hb_grid->add_child(sb_sep_y);
 
 	hb_grid->hide();
 
+	// Restore grid snap parameters.
+	_set_grid_parameters_clamping(false);
+	sb_off_x->set_value(snap_offset.x);
+	sb_off_y->set_value(snap_offset.y);
+	sb_step_x->set_value(snap_step.x);
+	sb_step_y->set_value(snap_step.y);
+	sb_sep_x->set_value(snap_separation.x);
+	sb_sep_y->set_value(snap_separation.y);
+
 	// Default the zoom to match the editor scale, but don't dezoom on editor scales below 100% to prevent pixel art from looking bad.
 	draw_zoom = MAX(1.0f, EDSCALE);
 	max_draw_zoom = 128.0f * MAX(1.0f, EDSCALE);
diff --git a/editor/plugins/texture_region_editor_plugin.h b/editor/plugins/texture_region_editor_plugin.h
index fb2b547bed..59a1b56c19 100644
--- a/editor/plugins/texture_region_editor_plugin.h
+++ b/editor/plugins/texture_region_editor_plugin.h
@@ -31,18 +31,17 @@
 #ifndef TEXTURE_REGION_EDITOR_PLUGIN_H
 #define TEXTURE_REGION_EDITOR_PLUGIN_H
 
-#include "canvas_item_editor_plugin.h"
 #include "editor/editor_inspector.h"
 #include "editor/editor_plugin.h"
-#include "scene/2d/sprite_2d.h"
-#include "scene/3d/sprite_3d.h"
 #include "scene/gui/dialogs.h"
-#include "scene/gui/nine_patch_rect.h"
-#include "scene/resources/style_box_texture.h"
 
 class AtlasTexture;
+class NinePatchRect;
 class OptionButton;
 class PanelContainer;
+class Sprite2D;
+class Sprite3D;
+class StyleBoxTexture;
 class ViewPanner;
 
 class TextureRegionEditor : public AcceptDialog {
@@ -138,6 +137,8 @@ class TextureRegionEditor : public AcceptDialog {
 
 	void _draw_margin_line(Vector2 p_from, Vector2 p_to);
 
+	void _set_grid_parameters_clamping(bool p_enabled);
+
 protected:
 	void _notification(int p_what);
 	static void _bind_methods();
diff --git a/editor/plugins/visual_shader_editor_plugin.cpp b/editor/plugins/visual_shader_editor_plugin.cpp
index 71af1dadd9..05b8b81fa7 100644
--- a/editor/plugins/visual_shader_editor_plugin.cpp
+++ b/editor/plugins/visual_shader_editor_plugin.cpp
@@ -7183,8 +7183,35 @@ public:
 		undo_redo->add_do_property(node.ptr(), p_property, p_value);
 		undo_redo->add_undo_property(node.ptr(), p_property, node->get(p_property));
 
+		Ref<VisualShaderNode> vsnode = editor->get_visual_shader()->get_node(shader_type, node_id);
+		ERR_FAIL_COND(vsnode.is_null());
+
+		// Check for invalid connections due to removed ports.
+		// We need to know the new state of the node to generate the proper undo/redo instructions.
+		// Quite hacky but the best way I could come up with for now.
+		Ref<VisualShaderNode> vsnode_new = vsnode->duplicate();
+		vsnode_new->set(p_property, p_value);
+		const int input_port_count = vsnode_new->get_input_port_count();
+		const int output_port_count = vsnode_new->get_output_port_count();
+
+		List<VisualShader::Connection> conns;
+		editor->get_visual_shader()->get_node_connections(shader_type, &conns);
+		VisualShaderGraphPlugin *graph_plugin = editor->get_graph_plugin();
+		bool undo_node_already_updated = false;
+		for (const VisualShader::Connection &c : conns) {
+			if ((c.from_node == node_id && c.from_port >= output_port_count) || (c.to_node == node_id && c.to_port >= input_port_count)) {
+				undo_redo->add_do_method(editor->get_visual_shader().ptr(), "disconnect_nodes", shader_type, c.from_node, c.from_port, c.to_node, c.to_port);
+				undo_redo->add_do_method(graph_plugin, "disconnect_nodes", shader_type, c.from_node, c.from_port, c.to_node, c.to_port);
+				// We need to update the node before reconnecting to avoid accessing a non-existing port.
+				undo_redo->add_undo_method(graph_plugin, "update_node_deferred", shader_type, node_id);
+				undo_node_already_updated = true;
+				undo_redo->add_undo_method(editor->get_visual_shader().ptr(), "connect_nodes", shader_type, c.from_node, c.from_port, c.to_node, c.to_port);
+				undo_redo->add_undo_method(graph_plugin, "connect_nodes", shader_type, c.from_node, c.from_port, c.to_node, c.to_port);
+			}
+		}
+
 		if (p_value.get_type() == Variant::OBJECT) {
-			Ref<Resource> prev_res = node->get(p_property);
+			Ref<Resource> prev_res = vsnode->get(p_property);
 			Ref<Resource> curr_res = p_value;
 
 			if (curr_res.is_null()) {
@@ -7199,14 +7226,16 @@ public:
 			}
 		}
 		if (p_property != "constant") {
-			VisualShaderGraphPlugin *graph_plugin = editor->get_graph_plugin();
 			if (graph_plugin) {
 				undo_redo->add_do_method(editor, "_update_next_previews", node_id);
 				undo_redo->add_undo_method(editor, "_update_next_previews", node_id);
 				undo_redo->add_do_method(graph_plugin, "update_node_deferred", shader_type, node_id);
-				undo_redo->add_undo_method(graph_plugin, "update_node_deferred", shader_type, node_id);
+				if (!undo_node_already_updated) {
+					undo_redo->add_undo_method(graph_plugin, "update_node_deferred", shader_type, node_id);
+				}
 			}
 		}
+
 		undo_redo->commit_action();
 
 		updating = false;
diff --git a/editor/plugins/visual_shader_editor_plugin.h b/editor/plugins/visual_shader_editor_plugin.h
index 253ff264aa..e499bbde1e 100644
--- a/editor/plugins/visual_shader_editor_plugin.h
+++ b/editor/plugins/visual_shader_editor_plugin.h
@@ -578,7 +578,10 @@ public:
 	void update_custom_type(const Ref<Resource> &p_resource);
 
 	virtual Size2 get_minimum_size() const override;
+
 	void edit(VisualShader *p_visual_shader);
+	Ref<VisualShader> get_visual_shader() const { return visual_shader; }
+
 	VisualShaderEditor();
 };
 
diff --git a/editor/scene_tree_dock.cpp b/editor/scene_tree_dock.cpp
index 26ddec6603..757b9e72ea 100644
--- a/editor/scene_tree_dock.cpp
+++ b/editor/scene_tree_dock.cpp
@@ -76,6 +76,43 @@ void SceneTreeDock::_quick_open() {
 	instantiate_scenes(quick_open->get_selected_files(), scene_tree->get_selected());
 }
 
+void SceneTreeDock::_inspect_hovered_node() {
+	scene_tree->set_selected(node_hovered_now);
+	scene_tree->set_marked(node_hovered_now);
+	Tree *tree = scene_tree->get_scene_tree();
+	TreeItem *item = tree->get_item_at_position(tree->get_local_mouse_position());
+	if (item) {
+		item->set_as_cursor(0);
+	}
+	InspectorDock::get_inspector_singleton()->edit(node_hovered_now);
+	InspectorDock::get_inspector_singleton()->propagate_notification(NOTIFICATION_DRAG_BEGIN); // Enable inspector drag preview after it updated.
+	InspectorDock::get_singleton()->update(node_hovered_now);
+	EditorNode::get_singleton()->hide_unused_editors();
+}
+
+void SceneTreeDock::_handle_hover_to_inspect() {
+	Tree *tree = scene_tree->get_scene_tree();
+	TreeItem *item = tree->get_item_at_position(tree->get_local_mouse_position());
+
+	if (item) {
+		const NodePath &np = item->get_metadata(0);
+		node_hovered_now = get_node_or_null(np);
+		if (node_hovered_previously != node_hovered_now) {
+			inspect_hovered_node_delay->start();
+		}
+		node_hovered_previously = node_hovered_now;
+	} else {
+		_reset_hovering_timer();
+	}
+}
+
+void SceneTreeDock::_reset_hovering_timer() {
+	if (!inspect_hovered_node_delay->is_stopped()) {
+		inspect_hovered_node_delay->stop();
+	}
+	node_hovered_previously = nullptr;
+}
+
 void SceneTreeDock::input(const Ref<InputEvent> &p_event) {
 	ERR_FAIL_COND(p_event.is_null());
 
@@ -92,6 +129,17 @@ void SceneTreeDock::input(const Ref<InputEvent> &p_event) {
 			_push_item(pending_click_select);
 			pending_click_select = nullptr;
 		}
+
+		if (mb->is_released()) {
+			if (scene_tree->has_marked()) {
+				scene_tree->set_marked(nullptr);
+			}
+			_reset_hovering_timer();
+		}
+	}
+
+	if (tree_clicked && get_viewport()->gui_is_dragging()) {
+		_handle_hover_to_inspect();
 	}
 }
 
@@ -1539,6 +1587,10 @@ void SceneTreeDock::_notification(int p_what) {
 				}
 			}
 		} break;
+
+		case NOTIFICATION_DRAG_END: {
+			_reset_hovering_timer();
+		} break;
 	}
 }
 
@@ -4300,6 +4352,7 @@ SceneTreeDock::SceneTreeDock(Node *p_scene_root, EditorSelection *p_editor_selec
 	scene_tree->connect("files_dropped", callable_mp(this, &SceneTreeDock::_files_dropped));
 	scene_tree->connect("script_dropped", callable_mp(this, &SceneTreeDock::_script_dropped));
 	scene_tree->connect("nodes_dragged", callable_mp(this, &SceneTreeDock::_nodes_drag_begin));
+	scene_tree->connect("mouse_exited", callable_mp(this, &SceneTreeDock::_reset_hovering_timer));
 
 	scene_tree->get_scene_tree()->connect("gui_input", callable_mp(this, &SceneTreeDock::_scene_tree_gui_input));
 	scene_tree->get_scene_tree()->connect("item_icon_double_clicked", callable_mp(this, &SceneTreeDock::_focus_node));
@@ -4309,6 +4362,11 @@ SceneTreeDock::SceneTreeDock(Node *p_scene_root, EditorSelection *p_editor_selec
 	scene_tree->set_as_scene_tree_dock();
 	scene_tree->set_editor_selection(editor_selection);
 
+	inspect_hovered_node_delay = memnew(Timer);
+	inspect_hovered_node_delay->connect("timeout", callable_mp(this, &SceneTreeDock::_inspect_hovered_node));
+	inspect_hovered_node_delay->set_one_shot(true);
+	add_child(inspect_hovered_node_delay);
+
 	create_dialog = memnew(CreateDialog);
 	create_dialog->set_base_type("Node");
 	add_child(create_dialog);
diff --git a/editor/scene_tree_dock.h b/editor/scene_tree_dock.h
index 4c1eb5715a..86e9ff8a47 100644
--- a/editor/scene_tree_dock.h
+++ b/editor/scene_tree_dock.h
@@ -234,6 +234,14 @@ class SceneTreeDock : public VBoxContainer {
 	void _node_prerenamed(Node *p_node, const String &p_new_name);
 
 	void _nodes_drag_begin();
+
+	void _handle_hover_to_inspect();
+	void _inspect_hovered_node();
+	void _reset_hovering_timer();
+	Timer *inspect_hovered_node_delay = nullptr;
+	Node *node_hovered_now = nullptr;
+	Node *node_hovered_previously = nullptr;
+
 	virtual void input(const Ref<InputEvent> &p_event) override;
 	virtual void shortcut_input(const Ref<InputEvent> &p_event) override;
 	void _scene_tree_gui_input(Ref<InputEvent> p_event);
diff --git a/main/main.cpp b/main/main.cpp
index 6a132a37ae..357033b6d8 100644
--- a/main/main.cpp
+++ b/main/main.cpp
@@ -92,6 +92,7 @@
 #include "editor/debugger/editor_debugger_node.h"
 #include "editor/doc_data_class_path.gen.h"
 #include "editor/doc_tools.h"
+#include "editor/editor_file_system.h"
 #include "editor/editor_help.h"
 #include "editor/editor_node.h"
 #include "editor/editor_paths.h"
@@ -184,6 +185,7 @@ static OS::ProcessID editor_pid = 0;
 static bool found_project = false;
 static bool auto_build_solutions = false;
 static String debug_server_uri;
+static bool wait_for_import = false;
 #ifndef DISABLE_DEPRECATED
 static int converter_max_kb_file = 4 * 1024; // 4MB
 static int converter_max_line_length = 100000;
@@ -617,6 +619,7 @@ void Main::print_help(const char *p_binary) {
 	print_help_option("--main-loop <main_loop_name>", "Run a MainLoop specified by its global class name.\n");
 	print_help_option("--check-only", "Only parse for errors and quit (use with --script).\n");
 #ifdef TOOLS_ENABLED
+	print_help_option("--import", "Starts the editor, waits for any resources to be imported, and then quits.\n", CLI_OPTION_AVAILABILITY_EDITOR);
 	print_help_option("--export-release <preset> <path>", "Export the project in release mode using the given preset and output path. The preset name should match one defined in \"export_presets.cfg\".\n", CLI_OPTION_AVAILABILITY_EDITOR);
 	print_help_option("", "<path> should be absolute or relative to the project directory, and include the filename for the binary (e.g. \"builds/game.exe\").\n");
 	print_help_option("", "The target directory must exist.\n");
@@ -1420,12 +1423,17 @@ Error Main::setup(const char *execpath, int argc, char *argv[], bool p_second_ph
 				OS::get_singleton()->print("Missing file to load argument after --validate-extension-api, aborting.");
 				goto error;
 			}
-
+		} else if (I->get() == "--import") {
+			editor = true;
+			cmdline_tool = true;
+			wait_for_import = true;
+			quit_after = 1;
 		} else if (I->get() == "--export-release" || I->get() == "--export-debug" ||
 				I->get() == "--export-pack") { // Export project
 			// Actually handling is done in start().
 			editor = true;
 			cmdline_tool = true;
+			wait_for_import = true;
 			main_args.push_back(I->get());
 #ifndef DISABLE_DEPRECATED
 		} else if (I->get() == "--export") { // For users used to 3.x syntax.
@@ -4084,6 +4092,12 @@ bool Main::iteration() {
 		exit = true;
 	}
 
+#ifdef TOOLS_ENABLED
+	if (wait_for_import && EditorFileSystem::get_singleton()->doing_first_scan()) {
+		exit = false;
+	}
+#endif
+
 	if (fixed_fps != -1) {
 		return exit;
 	}
diff --git a/methods.py b/methods.py
index e65d2757bd..dd01469b86 100644
--- a/methods.py
+++ b/methods.py
@@ -1326,7 +1326,7 @@ def generate_vs_project(env, original_args, project_name="godot"):
 
         filters_template = filters_template.replace("%%HASH%%", md5)
 
-        with open(f"{project_name}.vcxproj.filters", "w", encoding="utf-8", newline="\n") as f:
+        with open(f"{project_name}.vcxproj.filters", "w", encoding="utf-8", newline="\r\n") as f:
             f.write(filters_template)
 
     envsources = []
@@ -1452,14 +1452,18 @@ def generate_vs_project(env, original_args, project_name="godot"):
 
         props_template = props_template.replace("%%OUTPUT%%", output)
 
-        props_template = props_template.replace(
-            "%%DEFINES%%", ";".join([format_key_value(v) for v in list(env["CPPDEFINES"])])
-        )
-        props_template = props_template.replace("%%INCLUDES%%", ";".join([str(j) for j in env["CPPPATH"]]))
-        props_template = props_template.replace(
-            "%%OPTIONS%%",
-            " ".join(env["CCFLAGS"]) + " " + " ".join([x for x in env["CXXFLAGS"] if not x.startswith("$")]),
-        )
+        proplist = [format_key_value(v) for v in list(env["CPPDEFINES"])]
+        proplist += [format_key_value(j) for j in env.get("VSHINT_DEFINES", [])]
+        props_template = props_template.replace("%%DEFINES%%", ";".join(proplist))
+
+        proplist = [str(j) for j in env["CPPPATH"]]
+        proplist += [str(j) for j in env.get("VSHINT_INCLUDES", [])]
+        props_template = props_template.replace("%%INCLUDES%%", ";".join(proplist))
+
+        proplist = env["CCFLAGS"]
+        proplist += [x for x in env["CXXFLAGS"] if not x.startswith("$")]
+        proplist += [str(j) for j in env.get("VSHINT_OPTIONS", [])]
+        props_template = props_template.replace("%%OPTIONS%%", " ".join(proplist))
 
         # Windows allows us to have spaces in paths, so we need
         # to double quote off the directory. However, the path ends
@@ -1503,7 +1507,7 @@ def generate_vs_project(env, original_args, project_name="godot"):
         props_template = props_template.replace("%%CLEAN%%", cmd)
 
         with open(
-            f"{project_name}.{platform}.{target}.{arch}.generated.props", "w", encoding="utf-8", newline="\n"
+            f"{project_name}.{platform}.{target}.{arch}.generated.props", "w", encoding="utf-8", newline="\r\n"
         ) as f:
             f.write(props_template)
 
@@ -1620,7 +1624,7 @@ def generate_vs_project(env, original_args, project_name="godot"):
         sln_template = sln_template.replace("%%SECTION1%%", "\n    ".join(section1))
         sln_template = sln_template.replace("%%SECTION2%%", "\n    ".join(section2))
 
-        with open(f"{project_name}.sln", "w", encoding="utf-8", newline="\n") as f:
+        with open(f"{project_name}.sln", "w", encoding="utf-8", newline="\r\n") as f:
             f.write(sln_template)
 
     if get_bool(original_args, "vsproj_gen_only", True):
diff --git a/misc/extension_api_validation/4.2-stable.expected b/misc/extension_api_validation/4.2-stable.expected
index bb10b422dc..9cd732813e 100644
--- a/misc/extension_api_validation/4.2-stable.expected
+++ b/misc/extension_api_validation/4.2-stable.expected
@@ -241,13 +241,6 @@ Validate extension JSON: Error: Field 'classes/AcceptDialog/methods/remove_butto
 Changed argument type to the more specific one actually expected by the method. Compatibility method registered.
 
 
-GH-89992
---------
-Validate extension JSON: Error: Field 'classes/Node/methods/replace_by/arguments': size changed value in new API, from 2 to 3.
-
-Added optional argument to prevent children to be reparented during replace_by. Compatibility method registered.
-
-
 GH-88047
 --------
 Validate extension JSON: Error: Field 'classes/AStar2D/methods/get_id_path/arguments': size changed value in new API, from 2 to 3.
diff --git a/misc/msvs/props.template b/misc/msvs/props.template
index 8facaf7f36..82e3e81f18 100644
--- a/misc/msvs/props.template
+++ b/misc/msvs/props.template
@@ -4,13 +4,13 @@
     <NMakeBuildCommandLine>%%BUILD%%</NMakeBuildCommandLine>
     <NMakeReBuildCommandLine>%%REBUILD%%</NMakeReBuildCommandLine>
     <NMakeCleanCommandLine>%%CLEAN%%</NMakeCleanCommandLine>
-    <NMakeOutput>%%OUTPUT%%</NMakeOutput>
-    <NMakePreprocessorDefinitions>%%DEFINES%%</NMakePreprocessorDefinitions>
-    <NMakeIncludeSearchPath>%%INCLUDES%%</NMakeIncludeSearchPath>
+    <NMakeOutput Condition="'$(NMakeOutput)' == ''">%%OUTPUT%%</NMakeOutput>
+    <NMakePreprocessorDefinitions>%%DEFINES%%;$(NMakePreprocessorDefinitions)</NMakePreprocessorDefinitions>
+    <NMakeIncludeSearchPath>%%INCLUDES%%;$(NMakeIncludeSearchPath)</NMakeIncludeSearchPath>
     <NMakeForcedIncludes>$(NMakeForcedIncludes)</NMakeForcedIncludes>
     <NMakeAssemblySearchPath>$(NMakeAssemblySearchPath)</NMakeAssemblySearchPath>
     <NMakeForcedUsingAssemblies>$(NMakeForcedUsingAssemblies)</NMakeForcedUsingAssemblies>
-    <AdditionalOptions>%%OPTIONS%%</AdditionalOptions>
+    <AdditionalOptions>%%OPTIONS%% $(AdditionalOptions)</AdditionalOptions>
   </PropertyGroup>
   <PropertyGroup Condition="%%CONDITION%%">
     %%PROPERTIES%%
@@ -18,4 +18,7 @@
   <ItemGroup Condition="%%CONDITION%%">
     %%EXTRA_ITEMS%%
   </ItemGroup>
-</Project>
-\ No newline at end of file
+</Project>
+<!-- CHECKSUM
+%%HASH%%
+-->
diff --git a/misc/msvs/sln.template b/misc/msvs/sln.template
index 7d05548c6e..25d9d88a49 100644
--- a/misc/msvs/sln.template
+++ b/misc/msvs/sln.template
@@ -5,16 +5,16 @@ MinimumVisualStudioVersion = 10.0.40219.1
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "%%NAME%%", "%%NAME%%.vcxproj", "{%%UUID%%}"
 EndProject
 Global
-  GlobalSection(SolutionConfigurationPlatforms) = preSolution
-    %%SECTION1%%
-  EndGlobalSection
-  GlobalSection(ProjectConfigurationPlatforms) = postSolution
-    %%SECTION2%%
-  EndGlobalSection
-  GlobalSection(SolutionProperties) = preSolution
-    HideSolutionNode = FALSE
-  EndGlobalSection
-  GlobalSection(ExtensibilityGlobals) = postSolution
-    SolutionGuid = {%%SLNUUID%%}
-  EndGlobalSection
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		%%SECTION1%%
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		%%SECTION2%%
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+	GlobalSection(ExtensibilityGlobals) = postSolution
+		SolutionGuid = {%%SLNUUID%%}
+	EndGlobalSection
 EndGlobal
diff --git a/misc/msvs/vcxproj.filters.template b/misc/msvs/vcxproj.filters.template
index d57eeee811..11d331d15d 100644
--- a/misc/msvs/vcxproj.filters.template
+++ b/misc/msvs/vcxproj.filters.template
@@ -27,4 +27,4 @@
 </Project>
 <!-- CHECKSUM
 %%HASH%%
--->
-\ No newline at end of file
+-->
diff --git a/misc/msvs/vcxproj.template b/misc/msvs/vcxproj.template
index 30f29a55f8..2cdb82d01f 100644
--- a/misc/msvs/vcxproj.template
+++ b/misc/msvs/vcxproj.template
@@ -37,4 +37,7 @@
   <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
   <ImportGroup Label="ExtensionTargets">
   </ImportGroup>
-</Project>
-\ No newline at end of file
+</Project>
+<!-- CHECKSUM
+%%HASH%%
+-->
diff --git a/modules/gdscript/doc_classes/@GDScript.xml b/modules/gdscript/doc_classes/@GDScript.xml
index 4869573972..07d917ea04 100644
--- a/modules/gdscript/doc_classes/@GDScript.xml
+++ b/modules/gdscript/doc_classes/@GDScript.xml
@@ -168,7 +168,7 @@
 				# Load a scene called "main" located in the root of the project directory and cache it in a variable.
 				var main = load("res://main.tscn") # main will contain a PackedScene resource.
 				[/codeblock]
-				[b]Important:[/b] The path must be absolute. A relative path will always return [code]null[/code].
+				[b]Important:[/b] Relative paths are [i]not[/i] relative to the script calling this method, instead it is prefixed with [code]"res://"[/code]. Loading from relative paths might not work as expected.
 				This function is a simplified version of [method ResourceLoader.load], which can be used for more advanced scenarios.
 				[b]Note:[/b] Files have to be imported into the engine first to load them using this function. If you want to load [Image]s at run-time, you may use [method Image.load]. If you want to import audio files, you can use the snippet described in [member AudioStreamMP3.data].
 				[b]Note:[/b] If [member ProjectSettings.editor/export/convert_text_resources_to_binary] is [code]true[/code], [method @GDScript.load] will not be able to read converted files in an exported project. If you rely on run-time loading of files present within the PCK, set [member ProjectSettings.editor/export/convert_text_resources_to_binary] to [code]false[/code].
diff --git a/modules/gdscript/editor/gdscript_highlighter.cpp b/modules/gdscript/editor/gdscript_highlighter.cpp
index 439555bacb..f83b784f85 100644
--- a/modules/gdscript/editor/gdscript_highlighter.cpp
+++ b/modules/gdscript/editor/gdscript_highlighter.cpp
@@ -53,7 +53,6 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 	bool in_keyword = false;
 	bool in_word = false;
 	bool in_number = false;
-	bool in_raw_string = false;
 	bool in_node_path = false;
 	bool in_node_ref = false;
 	bool in_annotation = false;
@@ -65,8 +64,10 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 
 	bool in_function_name = false; // Any call.
 	bool in_function_declaration = false; // Only declaration.
-	bool in_var_const_declaration = false;
 	bool in_signal_declaration = false;
+	bool is_after_func_signal_declaration = false;
+	bool in_var_const_declaration = false;
+	bool is_after_var_const_declaration = false;
 	bool expect_type = false;
 
 	int in_declaration_params = 0; // The number of opened `(` after func/signal name.
@@ -125,6 +126,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 			if (from != line_length) {
 				// Check if we are in entering a region.
 				if (in_region == -1) {
+					const bool r_prefix = from > 0 && str[from - 1] == 'r';
 					for (int c = 0; c < color_regions.size(); c++) {
 						// Check there is enough room.
 						int chars_left = line_length - from;
@@ -134,6 +136,10 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 							continue;
 						}
 
+						if (color_regions[c].is_string && color_regions[c].r_prefix != r_prefix) {
+							continue;
+						}
+
 						// Search the line.
 						bool match = true;
 						const char32_t *start_key = color_regions[c].start_key.get_data();
@@ -152,7 +158,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 						// Check if it's the whole line.
 						if (end_key_length == 0 || color_regions[c].line_only || from + end_key_length > line_length) {
 							// Don't skip comments, for highlighting markers.
-							if (color_regions[in_region].type == ColorRegion::TYPE_COMMENT) {
+							if (color_regions[in_region].is_comment) {
 								break;
 							}
 							if (from + end_key_length > line_length) {
@@ -174,7 +180,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 					}
 
 					// Don't skip comments, for highlighting markers.
-					if (j == line_length && color_regions[in_region].type != ColorRegion::TYPE_COMMENT) {
+					if (j == line_length && !color_regions[in_region].is_comment) {
 						continue;
 					}
 				}
@@ -196,7 +202,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 					highlighter_info["color"] = region_color;
 					color_map[j] = highlighter_info;
 
-					if (color_regions[in_region].type == ColorRegion::TYPE_COMMENT) {
+					if (color_regions[in_region].is_comment) {
 						int marker_start_pos = from;
 						int marker_len = 0;
 						while (from <= line_length) {
@@ -240,7 +246,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 							}
 
 							if (str[from] == '\\') {
-								if (!in_raw_string) {
+								if (!color_regions[in_region].r_prefix) {
 									Dictionary escape_char_highlighter_info;
 									escape_char_highlighter_info["color"] = symbol_color;
 									color_map[from] = escape_char_highlighter_info;
@@ -248,7 +254,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 
 								from++;
 
-								if (!in_raw_string) {
+								if (!color_regions[in_region].r_prefix) {
 									int esc_len = 0;
 									if (str[from] == 'u') {
 										esc_len = 4;
@@ -410,6 +416,8 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 				col = class_names[word];
 			} else if (reserved_keywords.has(word)) {
 				col = reserved_keywords[word];
+				// Don't highlight `list` as a type in `for elem: Type in list`.
+				expect_type = false;
 			} else if (member_keywords.has(word)) {
 				col = member_keywords[word];
 			}
@@ -480,6 +488,13 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 		}
 
 		if (is_a_symbol) {
+			if (in_function_declaration || in_signal_declaration) {
+				is_after_func_signal_declaration = true;
+			}
+			if (in_var_const_declaration) {
+				is_after_var_const_declaration = true;
+			}
+
 			if (in_declaration_params > 0) {
 				switch (str[j]) {
 					case '(':
@@ -495,7 +510,7 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 						in_declaration_param_dicts -= 1;
 						break;
 				}
-			} else if ((in_function_declaration || in_signal_declaration || prev_text == GDScriptTokenizer::get_token_name(GDScriptTokenizer::Token::FUNC)) && str[j] == '(') {
+			} else if ((is_after_func_signal_declaration || prev_text == GDScriptTokenizer::get_token_name(GDScriptTokenizer::Token::FUNC)) && str[j] == '(') {
 				in_declaration_params = 1;
 				in_declaration_param_dicts = 0;
 			}
@@ -526,28 +541,25 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 					expect_type = true;
 					in_type_params = 0;
 				}
-				if ((in_var_const_declaration || (in_declaration_params == 1 && in_declaration_param_dicts == 0)) && str[j] == ':') {
+				if ((is_after_var_const_declaration || (in_declaration_params == 1 && in_declaration_param_dicts == 0)) && str[j] == ':') {
 					expect_type = true;
 					in_type_params = 0;
 				}
 			}
 
+			in_function_name = false;
+			in_function_declaration = false;
+			in_signal_declaration = false;
+			in_var_const_declaration = false;
+			in_lambda = false;
+			in_member_variable = false;
+
 			if (!is_whitespace(str[j])) {
-				in_function_declaration = false;
-				in_var_const_declaration = false;
-				in_signal_declaration = false;
-				in_function_name = false;
-				in_lambda = false;
-				in_member_variable = false;
+				is_after_func_signal_declaration = false;
+				is_after_var_const_declaration = false;
 			}
 		}
 
-		if (!in_raw_string && in_region == -1 && str[j] == 'r' && j < line_length - 1 && (str[j + 1] == '"' || str[j + 1] == '\'')) {
-			in_raw_string = true;
-		} else if (in_raw_string && in_region == -1) {
-			in_raw_string = false;
-		}
-
 		// Keep symbol color for binary '&&'. In the case of '&&&' use StringName color for the last ampersand.
 		if (!in_string_name && in_region == -1 && str[j] == '&' && !is_binary_op) {
 			if (j >= 2 && str[j - 1] == '&' && str[j - 2] != '&' && prev_is_binary_op) {
@@ -579,7 +591,9 @@ Dictionary GDScriptSyntaxHighlighter::_get_line_syntax_highlighting_impl(int p_l
 			in_annotation = false;
 		}
 
-		if (in_raw_string) {
+		const bool in_raw_string_prefix = in_region == -1 && str[j] == 'r' && j + 1 < line_length && (str[j + 1] == '"' || str[j + 1] == '\'');
+
+		if (in_raw_string_prefix) {
 			color = string_color;
 		} else if (in_node_ref) {
 			next_type = NODE_REF;
@@ -781,6 +795,10 @@ void GDScriptSyntaxHighlighter::_update_cache() {
 	add_color_region(ColorRegion::TYPE_STRING, "'", "'", string_color);
 	add_color_region(ColorRegion::TYPE_MULTILINE_STRING, "\"\"\"", "\"\"\"", string_color);
 	add_color_region(ColorRegion::TYPE_MULTILINE_STRING, "'''", "'''", string_color);
+	add_color_region(ColorRegion::TYPE_STRING, "\"", "\"", string_color, false, true);
+	add_color_region(ColorRegion::TYPE_STRING, "'", "'", string_color, false, true);
+	add_color_region(ColorRegion::TYPE_MULTILINE_STRING, "\"\"\"", "\"\"\"", string_color, false, true);
+	add_color_region(ColorRegion::TYPE_MULTILINE_STRING, "'''", "'''", string_color, false, true);
 
 	const Ref<Script> scr = _get_edited_resource();
 	if (scr.is_valid()) {
@@ -913,7 +931,7 @@ void GDScriptSyntaxHighlighter::_update_cache() {
 	}
 }
 
-void GDScriptSyntaxHighlighter::add_color_region(ColorRegion::Type p_type, const String &p_start_key, const String &p_end_key, const Color &p_color, bool p_line_only) {
+void GDScriptSyntaxHighlighter::add_color_region(ColorRegion::Type p_type, const String &p_start_key, const String &p_end_key, const Color &p_color, bool p_line_only, bool p_r_prefix) {
 	ERR_FAIL_COND_MSG(p_start_key.is_empty(), "Color region start key cannot be empty.");
 	ERR_FAIL_COND_MSG(!is_symbol(p_start_key[0]), "Color region start key must start with a symbol.");
 
@@ -922,9 +940,9 @@ void GDScriptSyntaxHighlighter::add_color_region(ColorRegion::Type p_type, const
 	}
 
 	int at = 0;
-	for (int i = 0; i < color_regions.size(); i++) {
-		ERR_FAIL_COND_MSG(color_regions[i].start_key == p_start_key, "Color region with start key '" + p_start_key + "' already exists.");
-		if (p_start_key.length() < color_regions[i].start_key.length()) {
+	for (const ColorRegion &region : color_regions) {
+		ERR_FAIL_COND_MSG(region.start_key == p_start_key && region.r_prefix == p_r_prefix, "Color region with start key '" + p_start_key + "' already exists.");
+		if (p_start_key.length() < region.start_key.length()) {
 			at++;
 		} else {
 			break;
@@ -937,6 +955,9 @@ void GDScriptSyntaxHighlighter::add_color_region(ColorRegion::Type p_type, const
 	color_region.start_key = p_start_key;
 	color_region.end_key = p_end_key;
 	color_region.line_only = p_line_only;
+	color_region.r_prefix = p_r_prefix;
+	color_region.is_string = p_type == ColorRegion::TYPE_STRING || p_type == ColorRegion::TYPE_MULTILINE_STRING;
+	color_region.is_comment = p_type == ColorRegion::TYPE_COMMENT || p_type == ColorRegion::TYPE_CODE_REGION;
 	color_regions.insert(at, color_region);
 	clear_highlighting_cache();
 }
diff --git a/modules/gdscript/editor/gdscript_highlighter.h b/modules/gdscript/editor/gdscript_highlighter.h
index eb7bb7d801..655557b3d9 100644
--- a/modules/gdscript/editor/gdscript_highlighter.h
+++ b/modules/gdscript/editor/gdscript_highlighter.h
@@ -52,6 +52,9 @@ private:
 		String start_key;
 		String end_key;
 		bool line_only = false;
+		bool r_prefix = false;
+		bool is_string = false; // `TYPE_STRING` or `TYPE_MULTILINE_STRING`.
+		bool is_comment = false; // `TYPE_COMMENT` or `TYPE_CODE_REGION`.
 	};
 	Vector<ColorRegion> color_regions;
 	HashMap<int, int> color_region_cache;
@@ -103,7 +106,7 @@ private:
 	Color comment_marker_colors[COMMENT_MARKER_MAX];
 	HashMap<String, CommentMarkerLevel> comment_markers;
 
-	void add_color_region(ColorRegion::Type p_type, const String &p_start_key, const String &p_end_key, const Color &p_color, bool p_line_only = false);
+	void add_color_region(ColorRegion::Type p_type, const String &p_start_key, const String &p_end_key, const Color &p_color, bool p_line_only = false, bool p_r_prefix = false);
 
 public:
 	virtual void _update_cache() override;
diff --git a/modules/gdscript/gdscript_analyzer.cpp b/modules/gdscript/gdscript_analyzer.cpp
index f67f4913c3..a30a87f3a1 100644
--- a/modules/gdscript/gdscript_analyzer.cpp
+++ b/modules/gdscript/gdscript_analyzer.cpp
@@ -1973,8 +1973,6 @@ void GDScriptAnalyzer::resolve_variable(GDScriptParser::VariableNode *p_variable
 	if (p_is_local) {
 		if (p_variable->usages == 0 && !String(p_variable->identifier->name).begins_with("_")) {
 			parser->push_warning(p_variable, GDScriptWarning::UNUSED_VARIABLE, p_variable->identifier->name);
-		} else if (p_variable->assignments == 0) {
-			parser->push_warning(p_variable, GDScriptWarning::UNASSIGNED_VARIABLE, p_variable->identifier->name);
 		}
 	}
 	is_shadowing(p_variable->identifier, kind, p_is_local);
@@ -2615,9 +2613,21 @@ void GDScriptAnalyzer::update_array_literal_element_type(GDScriptParser::ArrayNo
 }
 
 void GDScriptAnalyzer::reduce_assignment(GDScriptParser::AssignmentNode *p_assignment) {
-	reduce_expression(p_assignment->assignee);
 	reduce_expression(p_assignment->assigned_value);
 
+#ifdef DEBUG_ENABLED
+	// Increment assignment count for local variables.
+	// Before we reduce the assignee because we don't want to warn about not being assigned when performing the assignment.
+	if (p_assignment->assignee->type == GDScriptParser::Node::IDENTIFIER) {
+		GDScriptParser::IdentifierNode *id = static_cast<GDScriptParser::IdentifierNode *>(p_assignment->assignee);
+		if (id->source == GDScriptParser::IdentifierNode::LOCAL_VARIABLE && id->variable_source) {
+			id->variable_source->assignments++;
+		}
+	}
+#endif
+
+	reduce_expression(p_assignment->assignee);
+
 	if (p_assignment->assigned_value == nullptr || p_assignment->assignee == nullptr) {
 		return;
 	}
@@ -2754,6 +2764,14 @@ void GDScriptAnalyzer::reduce_assignment(GDScriptParser::AssignmentNode *p_assig
 	if (assignee_type.is_hard_type() && assignee_type.builtin_type == Variant::INT && assigned_value_type.builtin_type == Variant::FLOAT) {
 		parser->push_warning(p_assignment->assigned_value, GDScriptWarning::NARROWING_CONVERSION);
 	}
+	// Check for assignment with operation before assignment.
+	if (p_assignment->operation != GDScriptParser::AssignmentNode::OP_NONE && p_assignment->assignee->type == GDScriptParser::Node::IDENTIFIER) {
+		GDScriptParser::IdentifierNode *id = static_cast<GDScriptParser::IdentifierNode *>(p_assignment->assignee);
+		// Use == 1 here because this assignment was already counted in the beginning of the function.
+		if (id->source == GDScriptParser::IdentifierNode::LOCAL_VARIABLE && id->variable_source && id->variable_source->assignments == 1) {
+			parser->push_warning(p_assignment, GDScriptWarning::UNASSIGNED_VARIABLE_OP_ASSIGN, id->name);
+		}
+	}
 #endif
 }
 
@@ -2846,6 +2864,11 @@ void GDScriptAnalyzer::reduce_binary_op(GDScriptParser::BinaryOpNode *p_binary_o
 		result.type_source = GDScriptParser::DataType::ANNOTATED_EXPLICIT;
 		result.kind = GDScriptParser::DataType::BUILTIN;
 		result.builtin_type = Variant::BOOL;
+	} else if (p_binary_op->variant_op == Variant::OP_MODULE && left_type.builtin_type == Variant::STRING) {
+		// The modulo operator (%) on string acts as formatting and will always return a string.
+		result.type_source = left_type.type_source;
+		result.kind = GDScriptParser::DataType::BUILTIN;
+		result.builtin_type = Variant::STRING;
 	} else if (left_type.is_variant() || right_type.is_variant()) {
 		// Cannot infer type because one operand can be anything.
 		result.kind = GDScriptParser::DataType::VARIANT;
@@ -3439,9 +3462,7 @@ void GDScriptAnalyzer::reduce_cast(GDScriptParser::CastNode *p_cast) {
 		if (op_type.is_variant() || !op_type.is_hard_type()) {
 			mark_node_unsafe(p_cast);
 #ifdef DEBUG_ENABLED
-			if (op_type.is_variant() && !op_type.is_hard_type()) {
-				parser->push_warning(p_cast, GDScriptWarning::UNSAFE_CAST, cast_type.to_string());
-			}
+			parser->push_warning(p_cast, GDScriptWarning::UNSAFE_CAST, cast_type.to_string());
 #endif
 		} else {
 			bool valid = false;
@@ -3926,6 +3947,11 @@ void GDScriptAnalyzer::reduce_identifier(GDScriptParser::IdentifierNode *p_ident
 		case GDScriptParser::IdentifierNode::LOCAL_VARIABLE:
 			p_identifier->set_datatype(p_identifier->variable_source->get_datatype());
 			found_source = true;
+#ifdef DEBUG_ENABLED
+			if (p_identifier->variable_source && p_identifier->variable_source->assignments == 0 && !(p_identifier->get_datatype().is_hard_type() && p_identifier->get_datatype().kind == GDScriptParser::DataType::BUILTIN)) {
+				parser->push_warning(p_identifier, GDScriptWarning::UNASSIGNED_VARIABLE, p_identifier->name);
+			}
+#endif
 			break;
 		case GDScriptParser::IdentifierNode::LOCAL_ITERATOR:
 			p_identifier->set_datatype(p_identifier->bind_source->get_datatype());
diff --git a/modules/gdscript/gdscript_editor.cpp b/modules/gdscript/gdscript_editor.cpp
index 854c944e14..eaef8a961c 100644
--- a/modules/gdscript/gdscript_editor.cpp
+++ b/modules/gdscript/gdscript_editor.cpp
@@ -651,6 +651,21 @@ static int _get_enum_constant_location(const StringName &p_class, const StringNa
 	return depth | ScriptLanguage::LOCATION_PARENT_MASK;
 }
 
+static int _get_enum_location(const StringName &p_class, const StringName &p_enum) {
+	if (!ClassDB::has_enum(p_class, p_enum)) {
+		return ScriptLanguage::LOCATION_OTHER;
+	}
+
+	int depth = 0;
+	StringName class_test = p_class;
+	while (class_test && !ClassDB::has_enum(class_test, p_enum, true)) {
+		class_test = ClassDB::get_parent_class(class_test);
+		depth++;
+	}
+
+	return depth | ScriptLanguage::LOCATION_PARENT_MASK;
+}
+
 // END LOCATION METHODS
 
 static String _trim_parent_class(const String &p_class, const String &p_base_class) {
@@ -1202,13 +1217,15 @@ static void _find_identifiers_in_base(const GDScriptCompletionIdentifier &p_base
 					return;
 				}
 
+				List<StringName> enums;
+				ClassDB::get_enum_list(type, &enums);
+				for (const StringName &E : enums) {
+					int location = p_recursion_depth + _get_enum_location(type, E);
+					ScriptLanguage::CodeCompletionOption option(E, ScriptLanguage::CODE_COMPLETION_KIND_ENUM, location);
+					r_result.insert(option.display, option);
+				}
+
 				if (p_types_only) {
-					List<StringName> enums;
-					ClassDB::get_enum_list(type, &enums);
-					for (const StringName &E : enums) {
-						ScriptLanguage::CodeCompletionOption option(E, ScriptLanguage::CODE_COMPLETION_KIND_ENUM);
-						r_result.insert(option.display, option);
-					}
 					return;
 				}
 
@@ -1268,7 +1285,20 @@ static void _find_identifiers_in_base(const GDScriptCompletionIdentifier &p_base
 				}
 				return;
 			} break;
-			case GDScriptParser::DataType::ENUM:
+			case GDScriptParser::DataType::ENUM: {
+				String type_str = base_type.native_type;
+				StringName type = type_str.get_slicec('.', 0);
+				StringName type_enum = base_type.enum_type;
+
+				List<StringName> enum_values;
+				ClassDB::get_enum_constants(type, type_enum, &enum_values);
+				for (const StringName &E : enum_values) {
+					int location = p_recursion_depth + _get_enum_constant_location(type, E);
+					ScriptLanguage::CodeCompletionOption option(E, ScriptLanguage::CODE_COMPLETION_KIND_CONSTANT, location);
+					r_result.insert(option.display, option);
+				}
+			}
+				[[fallthrough]];
 			case GDScriptParser::DataType::BUILTIN: {
 				if (p_types_only) {
 					return;
diff --git a/modules/gdscript/gdscript_parser.cpp b/modules/gdscript/gdscript_parser.cpp
index d706f4e9a3..173bff575a 100644
--- a/modules/gdscript/gdscript_parser.cpp
+++ b/modules/gdscript/gdscript_parser.cpp
@@ -2769,10 +2769,6 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_assignment(ExpressionNode
 		return parse_expression(false); // Return the following expression.
 	}
 
-#ifdef DEBUG_ENABLED
-	VariableNode *source_variable = nullptr;
-#endif
-
 	switch (p_previous_operand->type) {
 		case Node::IDENTIFIER: {
 #ifdef DEBUG_ENABLED
@@ -2781,8 +2777,6 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_assignment(ExpressionNode
 			IdentifierNode *id = static_cast<IdentifierNode *>(p_previous_operand);
 			switch (id->source) {
 				case IdentifierNode::LOCAL_VARIABLE:
-
-					source_variable = id->variable_source;
 					id->variable_source->usages--;
 					break;
 				case IdentifierNode::LOCAL_CONSTANT:
@@ -2813,16 +2807,10 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_assignment(ExpressionNode
 	update_extents(assignment);
 
 	make_completion_context(COMPLETION_ASSIGN, assignment);
-#ifdef DEBUG_ENABLED
-	bool has_operator = true;
-#endif
 	switch (previous.type) {
 		case GDScriptTokenizer::Token::EQUAL:
 			assignment->operation = AssignmentNode::OP_NONE;
 			assignment->variant_op = Variant::OP_MAX;
-#ifdef DEBUG_ENABLED
-			has_operator = false;
-#endif
 			break;
 		case GDScriptTokenizer::Token::PLUS_EQUAL:
 			assignment->operation = AssignmentNode::OP_ADDITION;
@@ -2878,16 +2866,6 @@ GDScriptParser::ExpressionNode *GDScriptParser::parse_assignment(ExpressionNode
 	}
 	complete_extents(assignment);
 
-#ifdef DEBUG_ENABLED
-	if (source_variable != nullptr) {
-		if (has_operator && source_variable->assignments == 0) {
-			push_warning(assignment, GDScriptWarning::UNASSIGNED_VARIABLE_OP_ASSIGN, source_variable->identifier->name);
-		}
-
-		source_variable->assignments += 1;
-	}
-#endif
-
 	return assignment;
 }
 
diff --git a/modules/gdscript/gdscript_warning.cpp b/modules/gdscript/gdscript_warning.cpp
index e7d9787eab..708966a0a8 100644
--- a/modules/gdscript/gdscript_warning.cpp
+++ b/modules/gdscript/gdscript_warning.cpp
@@ -40,7 +40,7 @@ String GDScriptWarning::get_message() const {
 	switch (code) {
 		case UNASSIGNED_VARIABLE:
 			CHECK_SYMBOLS(1);
-			return vformat(R"(The variable "%s" was used but never assigned a value.)", symbols[0]);
+			return vformat(R"(The variable "%s" was used before being assigned a value.)", symbols[0]);
 		case UNASSIGNED_VARIABLE_OP_ASSIGN:
 			CHECK_SYMBOLS(1);
 			return vformat(R"(Using assignment with operation but the variable "%s" was not previously assigned a value.)", symbols[0]);
@@ -96,7 +96,7 @@ String GDScriptWarning::get_message() const {
 			return vformat(R"*(The method "%s()" is not present on the inferred type "%s" (but may be present on a subtype).)*", symbols[0], symbols[1]);
 		case UNSAFE_CAST:
 			CHECK_SYMBOLS(1);
-			return vformat(R"(The value is cast to "%s" but has an unknown type.)", symbols[0]);
+			return vformat(R"(Casting "Variant" to "%s" is unsafe.)", symbols[0]);
 		case UNSAFE_CALL_ARGUMENT:
 			CHECK_SYMBOLS(5);
 			return vformat(R"*(The argument %s of the %s "%s()" requires the subtype "%s" but the supertype "%s" was provided.)*", symbols[0], symbols[1], symbols[2], symbols[3], symbols[4]);
diff --git a/modules/gdscript/gdscript_warning.h b/modules/gdscript/gdscript_warning.h
index 69cc8c179f..93c232a0f8 100644
--- a/modules/gdscript/gdscript_warning.h
+++ b/modules/gdscript/gdscript_warning.h
@@ -65,7 +65,7 @@ public:
 		INFERRED_DECLARATION, // Variable/constant/parameter has an implicitly inferred static type.
 		UNSAFE_PROPERTY_ACCESS, // Property not found in the detected type (but can be in subtypes).
 		UNSAFE_METHOD_ACCESS, // Function not found in the detected type (but can be in subtypes).
-		UNSAFE_CAST, // Cast used in an unknown type.
+		UNSAFE_CAST, // Casting a `Variant` value to non-`Variant`.
 		UNSAFE_CALL_ARGUMENT, // Function call argument is of a supertype of the required type.
 		UNSAFE_VOID_RETURN, // Function returns void but returned a call to a function that can't be type checked.
 		RETURN_VALUE_DISCARDED, // Function call returns something but the value isn't used.
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_array.gd b/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_array.gd
new file mode 100644
index 0000000000..b53e814eea
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_array.gd
@@ -0,0 +1,3 @@
+func test():
+	var integer := 1
+	print(integer as Array)
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_array.out b/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_array.out
new file mode 100644
index 0000000000..e3e82c2b7e
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_array.out
@@ -0,0 +1,2 @@
+GDTEST_ANALYZER_ERROR
+Invalid cast. Cannot convert from "int" to "Array".
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_object.gd b/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_object.gd
new file mode 100644
index 0000000000..323e367f8e
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_object.gd
@@ -0,0 +1,3 @@
+func test():
+	var integer := 1
+	print(integer as Node)
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_object.out b/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_object.out
new file mode 100644
index 0000000000..7de40418bf
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/cast_int_to_object.out
@@ -0,0 +1,2 @@
+GDTEST_ANALYZER_ERROR
+Invalid cast. Cannot convert from "int" to "Node".
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/cast_object_to_int.gd b/modules/gdscript/tests/scripts/analyzer/errors/cast_object_to_int.gd
new file mode 100644
index 0000000000..f6cd5e217e
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/cast_object_to_int.gd
@@ -0,0 +1,3 @@
+func test():
+	var object := RefCounted.new()
+	print(object as int)
diff --git a/modules/gdscript/tests/scripts/analyzer/errors/cast_object_to_int.out b/modules/gdscript/tests/scripts/analyzer/errors/cast_object_to_int.out
new file mode 100644
index 0000000000..8af0847577
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/errors/cast_object_to_int.out
@@ -0,0 +1,2 @@
+GDTEST_ANALYZER_ERROR
+Invalid cast. Cannot convert from "RefCounted" to "int".
diff --git a/modules/gdscript/tests/scripts/analyzer/features/enum_typecheck_inner_class.gd b/modules/gdscript/tests/scripts/analyzer/features/enum_typecheck_inner_class.gd
index 1c4b19d8e0..0444051831 100644
--- a/modules/gdscript/tests/scripts/analyzer/features/enum_typecheck_inner_class.gd
+++ b/modules/gdscript/tests/scripts/analyzer/features/enum_typecheck_inner_class.gd
@@ -11,6 +11,7 @@ class InnerClass:
 		var e2: InnerClass.MyEnum
 		var e3: EnumTypecheckOuterClass.InnerClass.MyEnum
 
+		@warning_ignore("unassigned_variable")
 		print("Self ", e1, e2, e3)
 		e1 = MyEnum.V1
 		e2 = MyEnum.V1
@@ -48,6 +49,7 @@ func test_outer_from_outer():
 	var e1: MyEnum
 	var e2: EnumTypecheckOuterClass.MyEnum
 
+	@warning_ignore("unassigned_variable")
 	print("Self ", e1, e2)
 	e1 = MyEnum.V1
 	e2 = MyEnum.V1
@@ -66,6 +68,7 @@ func test_inner_from_outer():
 	var e1: InnerClass.MyEnum
 	var e2: EnumTypecheckOuterClass.InnerClass.MyEnum
 
+	@warning_ignore("unassigned_variable")
 	print("Inner ", e1, e2)
 	e1 = InnerClass.MyEnum.V1
 	e2 = InnerClass.MyEnum.V1
diff --git a/modules/gdscript/tests/scripts/analyzer/features/infer_type_on_string_format.gd b/modules/gdscript/tests/scripts/analyzer/features/infer_type_on_string_format.gd
new file mode 100644
index 0000000000..c83a3a8a14
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/infer_type_on_string_format.gd
@@ -0,0 +1,6 @@
+# GH-88082
+
+func test():
+	var x = 1
+	var message := "value: %s" % x
+	print(message)
diff --git a/modules/gdscript/tests/scripts/analyzer/features/infer_type_on_string_format.out b/modules/gdscript/tests/scripts/analyzer/features/infer_type_on_string_format.out
new file mode 100644
index 0000000000..cf6464a4c3
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/infer_type_on_string_format.out
@@ -0,0 +1,2 @@
+GDTEST_OK
+value: 1
diff --git a/modules/gdscript/tests/scripts/analyzer/features/unassigned_builtin_typed.gd b/modules/gdscript/tests/scripts/analyzer/features/unassigned_builtin_typed.gd
new file mode 100644
index 0000000000..8099b366f3
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/unassigned_builtin_typed.gd
@@ -0,0 +1,7 @@
+# GH-88117, GH-85796
+
+func test():
+	var array: Array
+	# Should not emit unassigned warning because the Array type has a default value.
+	array.assign([1, 2, 3])
+	print(array)
diff --git a/modules/gdscript/tests/scripts/analyzer/features/unassigned_builtin_typed.out b/modules/gdscript/tests/scripts/analyzer/features/unassigned_builtin_typed.out
new file mode 100644
index 0000000000..6d85a6cc07
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/features/unassigned_builtin_typed.out
@@ -0,0 +1,2 @@
+GDTEST_OK
+[1, 2, 3]
diff --git a/modules/gdscript/tests/scripts/analyzer/features/warning_ignore_warnings.gd b/modules/gdscript/tests/scripts/analyzer/features/warning_ignore_warnings.gd
index 8a1ab6f406..333950d64e 100644
--- a/modules/gdscript/tests/scripts/analyzer/features/warning_ignore_warnings.gd
+++ b/modules/gdscript/tests/scripts/analyzer/features/warning_ignore_warnings.gd
@@ -31,8 +31,8 @@ func int_func() -> int:
 func test_warnings(unused_private_class_variable):
 	var t = 1
 
-	@warning_ignore("unassigned_variable")
 	var unassigned_variable
+	@warning_ignore("unassigned_variable")
 	print(unassigned_variable)
 
 	var _unassigned_variable_op_assign
diff --git a/modules/gdscript/tests/scripts/analyzer/warnings/unsafe_cast.gd b/modules/gdscript/tests/scripts/analyzer/warnings/unsafe_cast.gd
new file mode 100644
index 0000000000..1a6d10f8f7
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/warnings/unsafe_cast.gd
@@ -0,0 +1,24 @@
+# We don't want to execute it because of errors, just analyze.
+func no_exec_test():
+	var weak_int = 1
+	print(weak_int as Variant) # No warning.
+	print(weak_int as int)
+	print(weak_int as Node)
+
+	var weak_node = Node.new()
+	print(weak_node as Variant) # No warning.
+	print(weak_node as int)
+	print(weak_node as Node)
+
+	var weak_variant = null
+	print(weak_variant as Variant) # No warning.
+	print(weak_variant as int)
+	print(weak_variant as Node)
+
+	var hard_variant: Variant = null
+	print(hard_variant as Variant) # No warning.
+	print(hard_variant as int)
+	print(hard_variant as Node)
+
+func test():
+	pass
diff --git a/modules/gdscript/tests/scripts/analyzer/warnings/unsafe_cast.out b/modules/gdscript/tests/scripts/analyzer/warnings/unsafe_cast.out
new file mode 100644
index 0000000000..c1e683d942
--- /dev/null
+++ b/modules/gdscript/tests/scripts/analyzer/warnings/unsafe_cast.out
@@ -0,0 +1,33 @@
+GDTEST_OK
+>> WARNING
+>> Line: 5
+>> UNSAFE_CAST
+>> Casting "Variant" to "int" is unsafe.
+>> WARNING
+>> Line: 6
+>> UNSAFE_CAST
+>> Casting "Variant" to "Node" is unsafe.
+>> WARNING
+>> Line: 10
+>> UNSAFE_CAST
+>> Casting "Variant" to "int" is unsafe.
+>> WARNING
+>> Line: 11
+>> UNSAFE_CAST
+>> Casting "Variant" to "Node" is unsafe.
+>> WARNING
+>> Line: 15
+>> UNSAFE_CAST
+>> Casting "Variant" to "int" is unsafe.
+>> WARNING
+>> Line: 16
+>> UNSAFE_CAST
+>> Casting "Variant" to "Node" is unsafe.
+>> WARNING
+>> Line: 20
+>> UNSAFE_CAST
+>> Casting "Variant" to "int" is unsafe.
+>> WARNING
+>> Line: 21
+>> UNSAFE_CAST
+>> Casting "Variant" to "Node" is unsafe.
diff --git a/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_autocomplete.cfg b/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_autocomplete.cfg
new file mode 100644
index 0000000000..7c7b465f26
--- /dev/null
+++ b/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_autocomplete.cfg
@@ -0,0 +1,9 @@
+[output]
+include=[
+    {"display": "DrawMode",
+     "location": 256},
+    {"display": "Anchor",
+     "location": 257},
+    {"display": "TextureRepeat",
+     "location": 258},
+]
diff --git a/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_autocomplete.gd b/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_autocomplete.gd
new file mode 100644
index 0000000000..83f4b17a86
--- /dev/null
+++ b/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_autocomplete.gd
@@ -0,0 +1,4 @@
+extends Control
+
+func _ready():
+    var t = BaseButton.➡
diff --git a/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_values_autocompletion.cfg b/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_values_autocompletion.cfg
new file mode 100644
index 0000000000..7ccfa550e2
--- /dev/null
+++ b/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_values_autocompletion.cfg
@@ -0,0 +1,5 @@
+[output]
+include=[
+    {"display": "HEURISTIC_MAX"},
+    {"display": "HEURISTIC_OCTILE"},
+]
diff --git a/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_values_autocompletion.gd b/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_values_autocompletion.gd
new file mode 100644
index 0000000000..99e38be6b9
--- /dev/null
+++ b/modules/gdscript/tests/scripts/completion/builtin_enum/builtin_enum_values_autocompletion.gd
@@ -0,0 +1,4 @@
+extends Control
+
+func _ready():
+    AStarGrid2D.Heuristic.➡
diff --git a/modules/gdscript/tests/scripts/parser/warnings/unassigned_variable.gd b/modules/gdscript/tests/scripts/parser/warnings/unassigned_variable.gd
index afb5059eea..b38cffb754 100644
--- a/modules/gdscript/tests/scripts/parser/warnings/unassigned_variable.gd
+++ b/modules/gdscript/tests/scripts/parser/warnings/unassigned_variable.gd
@@ -1,2 +1,11 @@
 func test():
-	var __
+	var unassigned
+	print(unassigned)
+	unassigned = "something" # Assigned only after use.
+
+	var a
+	print(a) # Unassigned, warn.
+	if a: # Still unassigned, warn.
+		a = 1
+		print(a) # Assigned (dead code), don't warn.
+	print(a) # "Maybe" assigned, don't warn.
diff --git a/modules/gdscript/tests/scripts/parser/warnings/unassigned_variable.out b/modules/gdscript/tests/scripts/parser/warnings/unassigned_variable.out
index 10f89be132..36db304ef4 100644
--- a/modules/gdscript/tests/scripts/parser/warnings/unassigned_variable.out
+++ b/modules/gdscript/tests/scripts/parser/warnings/unassigned_variable.out
@@ -1,5 +1,16 @@
 GDTEST_OK
 >> WARNING
->> Line: 2
+>> Line: 3
 >> UNASSIGNED_VARIABLE
->> The variable "__" was used but never assigned a value.
+>> The variable "unassigned" was used before being assigned a value.
+>> WARNING
+>> Line: 7
+>> UNASSIGNED_VARIABLE
+>> The variable "a" was used before being assigned a value.
+>> WARNING
+>> Line: 8
+>> UNASSIGNED_VARIABLE
+>> The variable "a" was used before being assigned a value.
+<null>
+<null>
+<null>
diff --git a/modules/gdscript/tests/scripts/runtime/errors/cast_freed_object.gd b/modules/gdscript/tests/scripts/runtime/errors/cast_freed_object.gd
new file mode 100644
index 0000000000..6b766f4d3d
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/errors/cast_freed_object.gd
@@ -0,0 +1,4 @@
+func test():
+	var node := Node.new()
+	node.free()
+	print(node as Node2D)
diff --git a/modules/gdscript/tests/scripts/runtime/errors/cast_freed_object.out b/modules/gdscript/tests/scripts/runtime/errors/cast_freed_object.out
new file mode 100644
index 0000000000..90d81dd9a1
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/errors/cast_freed_object.out
@@ -0,0 +1,6 @@
+GDTEST_RUNTIME_ERROR
+>> SCRIPT ERROR
+>> on function: test()
+>> runtime/errors/cast_freed_object.gd
+>> 4
+>> Trying to cast a freed object.
diff --git a/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_array.gd b/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_array.gd
new file mode 100644
index 0000000000..00817c588f
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_array.gd
@@ -0,0 +1,4 @@
+func test():
+	var integer: Variant = 1
+	@warning_ignore("unsafe_cast")
+	print(integer as Array)
diff --git a/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_array.out b/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_array.out
new file mode 100644
index 0000000000..545d7a4906
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_array.out
@@ -0,0 +1,6 @@
+GDTEST_RUNTIME_ERROR
+>> SCRIPT ERROR
+>> on function: test()
+>> runtime/errors/cast_int_to_array.gd
+>> 4
+>> Invalid cast: could not convert value to 'Array'.
diff --git a/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_object.gd b/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_object.gd
new file mode 100644
index 0000000000..44673a4513
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_object.gd
@@ -0,0 +1,4 @@
+func test():
+	var integer: Variant = 1
+	@warning_ignore("unsafe_cast")
+	print(integer as Node)
diff --git a/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_object.out b/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_object.out
new file mode 100644
index 0000000000..7c39b46396
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/errors/cast_int_to_object.out
@@ -0,0 +1,6 @@
+GDTEST_RUNTIME_ERROR
+>> SCRIPT ERROR
+>> on function: test()
+>> runtime/errors/cast_int_to_object.gd
+>> 4
+>> Invalid cast: can't convert a non-object value to an object type.
diff --git a/modules/gdscript/tests/scripts/runtime/errors/cast_object_to_int.gd b/modules/gdscript/tests/scripts/runtime/errors/cast_object_to_int.gd
new file mode 100644
index 0000000000..830d0c0c4a
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/errors/cast_object_to_int.gd
@@ -0,0 +1,4 @@
+func test():
+	var object: Variant = RefCounted.new()
+	@warning_ignore("unsafe_cast")
+	print(object as int)
diff --git a/modules/gdscript/tests/scripts/runtime/errors/cast_object_to_int.out b/modules/gdscript/tests/scripts/runtime/errors/cast_object_to_int.out
new file mode 100644
index 0000000000..f922199fb3
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/errors/cast_object_to_int.out
@@ -0,0 +1,6 @@
+GDTEST_RUNTIME_ERROR
+>> SCRIPT ERROR
+>> on function: test()
+>> runtime/errors/cast_object_to_int.gd
+>> 4
+>> Invalid cast: could not convert value to 'int'.
diff --git a/modules/gdscript/tests/scripts/runtime/features/await_signal_with_parameters.gd b/modules/gdscript/tests/scripts/runtime/features/await_signal_with_parameters.gd
new file mode 100644
index 0000000000..ff0001676d
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/features/await_signal_with_parameters.gd
@@ -0,0 +1,25 @@
+signal no_parameters()
+signal one_parameter(number)
+signal two_parameters(number1, number2)
+
+func await_no_parameters():
+	var result = await no_parameters
+	print(result)
+
+func await_one_parameter():
+	var result = await one_parameter
+	print(result)
+
+func await_two_parameters():
+	var result = await two_parameters
+	print(result)
+
+func test():
+	await_no_parameters()
+	no_parameters.emit()
+
+	await_one_parameter()
+	one_parameter.emit(1)
+
+	await_two_parameters()
+	two_parameters.emit(1, 2)
diff --git a/modules/gdscript/tests/scripts/runtime/features/await_signal_with_parameters.out b/modules/gdscript/tests/scripts/runtime/features/await_signal_with_parameters.out
new file mode 100644
index 0000000000..9e203c601c
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/features/await_signal_with_parameters.out
@@ -0,0 +1,4 @@
+GDTEST_OK
+<null>
+1
+[1, 2]
diff --git a/modules/gdscript/tests/scripts/runtime/features/reset_unassigned_variables_in_loops.gd b/modules/gdscript/tests/scripts/runtime/features/reset_unassigned_variables_in_loops.gd
index c45f8dce48..2bd5362f2a 100644
--- a/modules/gdscript/tests/scripts/runtime/features/reset_unassigned_variables_in_loops.gd
+++ b/modules/gdscript/tests/scripts/runtime/features/reset_unassigned_variables_in_loops.gd
@@ -7,6 +7,7 @@ func test():
 			var b
 			if true:
 				var c
+				@warning_ignore("unassigned_variable")
 				prints("Begin:", i, a, b, c)
 				a = 1
 				b = 1
@@ -20,6 +21,7 @@ func test():
 			var b
 			if true:
 				var c
+				@warning_ignore("unassigned_variable")
 				prints("Begin:", j, a, b, c)
 				a = 1
 				b = 1
diff --git a/modules/gdscript/tests/scripts/runtime/features/type_casting.gd b/modules/gdscript/tests/scripts/runtime/features/type_casting.gd
new file mode 100644
index 0000000000..c63ea16c32
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/features/type_casting.gd
@@ -0,0 +1,24 @@
+func print_value(value: Variant) -> void:
+	if value is Object:
+		@warning_ignore("unsafe_method_access")
+		print("<%s>" % value.get_class())
+	else:
+		print(var_to_str(value))
+
+func test():
+	var int_value := 1
+	print_value(int_value as Variant)
+	print_value(int_value as int)
+	print_value(int_value as float)
+
+	var node_value := Node.new()
+	print_value(node_value as Variant)
+	print_value(node_value as Object)
+	print_value(node_value as Node)
+	print_value(node_value as Node2D)
+	node_value.free()
+
+	var null_value = null
+	print_value(null_value as Variant)
+	@warning_ignore("unsafe_cast")
+	print_value(null_value as Node)
diff --git a/modules/gdscript/tests/scripts/runtime/features/type_casting.out b/modules/gdscript/tests/scripts/runtime/features/type_casting.out
new file mode 100644
index 0000000000..7da5a4c0a4
--- /dev/null
+++ b/modules/gdscript/tests/scripts/runtime/features/type_casting.out
@@ -0,0 +1,10 @@
+GDTEST_OK
+1
+1
+1.0
+<Node>
+<Node>
+<Node>
+null
+null
+null
diff --git a/modules/gltf/doc_classes/GLTFPhysicsBody.xml b/modules/gltf/doc_classes/GLTFPhysicsBody.xml
index 5cfc22f6b2..cd701e2f2f 100644
--- a/modules/gltf/doc_classes/GLTFPhysicsBody.xml
+++ b/modules/gltf/doc_classes/GLTFPhysicsBody.xml
@@ -22,7 +22,7 @@
 			<return type="GLTFPhysicsBody" />
 			<param index="0" name="body_node" type="CollisionObject3D" />
 			<description>
-				Create a new GLTFPhysicsBody instance from the given Godot [CollisionObject3D] node.
+				Creates a new GLTFPhysicsBody instance from the given Godot [CollisionObject3D] node.
 			</description>
 		</method>
 		<method name="to_dictionary" qualifiers="const">
diff --git a/modules/gltf/doc_classes/GLTFPhysicsShape.xml b/modules/gltf/doc_classes/GLTFPhysicsShape.xml
index c397c660d9..a4aaf9415c 100644
--- a/modules/gltf/doc_classes/GLTFPhysicsShape.xml
+++ b/modules/gltf/doc_classes/GLTFPhysicsShape.xml
@@ -23,7 +23,14 @@
 			<return type="GLTFPhysicsShape" />
 			<param index="0" name="shape_node" type="CollisionShape3D" />
 			<description>
-				Create a new GLTFPhysicsShape instance from the given Godot [CollisionShape3D] node.
+				Creates a new GLTFPhysicsShape instance from the given Godot [CollisionShape3D] node.
+			</description>
+		</method>
+		<method name="from_resource" qualifiers="static">
+			<return type="GLTFPhysicsShape" />
+			<param index="0" name="shape_resource" type="Shape3D" />
+			<description>
+				Creates a new GLTFPhysicsShape instance from the given Godot [Shape3D] resource.
 			</description>
 		</method>
 		<method name="to_dictionary" qualifiers="const">
@@ -39,6 +46,13 @@
 				Converts this GLTFPhysicsShape instance into a Godot [CollisionShape3D] node.
 			</description>
 		</method>
+		<method name="to_resource">
+			<return type="Shape3D" />
+			<param index="0" name="cache_shapes" type="bool" default="false" />
+			<description>
+				Converts this GLTFPhysicsShape instance into a Godot [Shape3D] resource.
+			</description>
+		</method>
 	</methods>
 	<members>
 		<member name="height" type="float" setter="set_height" getter="get_height" default="2.0">
diff --git a/modules/gltf/extensions/physics/gltf_document_extension_physics.cpp b/modules/gltf/extensions/physics/gltf_document_extension_physics.cpp
index c6e34cc023..352b439332 100644
--- a/modules/gltf/extensions/physics/gltf_document_extension_physics.cpp
+++ b/modules/gltf/extensions/physics/gltf_document_extension_physics.cpp
@@ -355,6 +355,7 @@ void GLTFDocumentExtensionPhysics::convert_scene_node(Ref<GLTFState> p_state, Re
 	if (cast_to<CollisionShape3D>(p_scene_node)) {
 		CollisionShape3D *godot_shape = Object::cast_to<CollisionShape3D>(p_scene_node);
 		Ref<GLTFPhysicsShape> gltf_shape = GLTFPhysicsShape::from_node(godot_shape);
+		ERR_FAIL_COND_MSG(gltf_shape.is_null(), "GLTF Physics: Could not convert CollisionShape3D to GLTFPhysicsShape. Does it have a valid Shape3D?");
 		{
 			Ref<ImporterMesh> importer_mesh = gltf_shape->get_importer_mesh();
 			if (importer_mesh.is_valid()) {
diff --git a/modules/gltf/extensions/physics/gltf_physics_shape.cpp b/modules/gltf/extensions/physics/gltf_physics_shape.cpp
index 6c9ed82a69..6897bdbd3a 100644
--- a/modules/gltf/extensions/physics/gltf_physics_shape.cpp
+++ b/modules/gltf/extensions/physics/gltf_physics_shape.cpp
@@ -46,6 +46,9 @@ void GLTFPhysicsShape::_bind_methods() {
 	ClassDB::bind_static_method("GLTFPhysicsShape", D_METHOD("from_node", "shape_node"), &GLTFPhysicsShape::from_node);
 	ClassDB::bind_method(D_METHOD("to_node", "cache_shapes"), &GLTFPhysicsShape::to_node, DEFVAL(false));
 
+	ClassDB::bind_static_method("GLTFPhysicsShape", D_METHOD("from_resource", "shape_resource"), &GLTFPhysicsShape::from_resource);
+	ClassDB::bind_method(D_METHOD("to_resource", "cache_shapes"), &GLTFPhysicsShape::to_resource, DEFVAL(false));
+
 	ClassDB::bind_static_method("GLTFPhysicsShape", D_METHOD("from_dictionary", "dictionary"), &GLTFPhysicsShape::from_dictionary);
 	ClassDB::bind_method(D_METHOD("to_dictionary"), &GLTFPhysicsShape::to_dictionary);
 
@@ -159,44 +162,57 @@ Ref<ImporterMesh> _convert_hull_points_to_mesh(const Vector<Vector3> &p_hull_poi
 
 Ref<GLTFPhysicsShape> GLTFPhysicsShape::from_node(const CollisionShape3D *p_godot_shape_node) {
 	Ref<GLTFPhysicsShape> gltf_shape;
-	gltf_shape.instantiate();
 	ERR_FAIL_NULL_V_MSG(p_godot_shape_node, gltf_shape, "Tried to create a GLTFPhysicsShape from a CollisionShape3D node, but the given node was null.");
+	Ref<Shape3D> shape_resource = p_godot_shape_node->get_shape();
+	ERR_FAIL_COND_V_MSG(shape_resource.is_null(), gltf_shape, "Tried to create a GLTFPhysicsShape from a CollisionShape3D node, but the given node had a null shape.");
+	gltf_shape = from_resource(shape_resource);
+	// Check if the shape is part of a trigger.
 	Node *parent = p_godot_shape_node->get_parent();
 	if (cast_to<const Area3D>(parent)) {
 		gltf_shape->set_is_trigger(true);
 	}
-	// All the code for working with the shape is below this comment.
-	Ref<Shape3D> shape_resource = p_godot_shape_node->get_shape();
-	ERR_FAIL_COND_V_MSG(shape_resource.is_null(), gltf_shape, "Tried to create a GLTFPhysicsShape from a CollisionShape3D node, but the given node had a null shape.");
-	gltf_shape->_shape_cache = shape_resource;
-	if (cast_to<BoxShape3D>(shape_resource.ptr())) {
+	return gltf_shape;
+}
+
+CollisionShape3D *GLTFPhysicsShape::to_node(bool p_cache_shapes) {
+	CollisionShape3D *godot_shape_node = memnew(CollisionShape3D);
+	to_resource(p_cache_shapes); // Sets `_shape_cache`.
+	godot_shape_node->set_shape(_shape_cache);
+	return godot_shape_node;
+}
+
+Ref<GLTFPhysicsShape> GLTFPhysicsShape::from_resource(const Ref<Shape3D> &p_shape_resource) {
+	Ref<GLTFPhysicsShape> gltf_shape;
+	gltf_shape.instantiate();
+	ERR_FAIL_COND_V_MSG(p_shape_resource.is_null(), gltf_shape, "Tried to create a GLTFPhysicsShape from a Shape3D resource, but the given resource was null.");
+	if (cast_to<BoxShape3D>(p_shape_resource.ptr())) {
 		gltf_shape->shape_type = "box";
-		Ref<BoxShape3D> box = shape_resource;
+		Ref<BoxShape3D> box = p_shape_resource;
 		gltf_shape->set_size(box->get_size());
-	} else if (cast_to<const CapsuleShape3D>(shape_resource.ptr())) {
+	} else if (cast_to<const CapsuleShape3D>(p_shape_resource.ptr())) {
 		gltf_shape->shape_type = "capsule";
-		Ref<CapsuleShape3D> capsule = shape_resource;
+		Ref<CapsuleShape3D> capsule = p_shape_resource;
 		gltf_shape->set_radius(capsule->get_radius());
 		gltf_shape->set_height(capsule->get_height());
-	} else if (cast_to<const CylinderShape3D>(shape_resource.ptr())) {
+	} else if (cast_to<const CylinderShape3D>(p_shape_resource.ptr())) {
 		gltf_shape->shape_type = "cylinder";
-		Ref<CylinderShape3D> cylinder = shape_resource;
+		Ref<CylinderShape3D> cylinder = p_shape_resource;
 		gltf_shape->set_radius(cylinder->get_radius());
 		gltf_shape->set_height(cylinder->get_height());
-	} else if (cast_to<const SphereShape3D>(shape_resource.ptr())) {
+	} else if (cast_to<const SphereShape3D>(p_shape_resource.ptr())) {
 		gltf_shape->shape_type = "sphere";
-		Ref<SphereShape3D> sphere = shape_resource;
+		Ref<SphereShape3D> sphere = p_shape_resource;
 		gltf_shape->set_radius(sphere->get_radius());
-	} else if (cast_to<const ConvexPolygonShape3D>(shape_resource.ptr())) {
+	} else if (cast_to<const ConvexPolygonShape3D>(p_shape_resource.ptr())) {
 		gltf_shape->shape_type = "convex";
-		Ref<ConvexPolygonShape3D> convex = shape_resource;
+		Ref<ConvexPolygonShape3D> convex = p_shape_resource;
 		Vector<Vector3> hull_points = convex->get_points();
 		Ref<ImporterMesh> importer_mesh = _convert_hull_points_to_mesh(hull_points);
 		ERR_FAIL_COND_V_MSG(importer_mesh.is_null(), gltf_shape, "GLTFPhysicsShape: Failed to convert convex hull points to a mesh.");
 		gltf_shape->set_importer_mesh(importer_mesh);
-	} else if (cast_to<const ConcavePolygonShape3D>(shape_resource.ptr())) {
+	} else if (cast_to<const ConcavePolygonShape3D>(p_shape_resource.ptr())) {
 		gltf_shape->shape_type = "trimesh";
-		Ref<ConcavePolygonShape3D> concave = shape_resource;
+		Ref<ConcavePolygonShape3D> concave = p_shape_resource;
 		Ref<ImporterMesh> importer_mesh;
 		importer_mesh.instantiate();
 		Array surface_array;
@@ -205,14 +221,14 @@ Ref<GLTFPhysicsShape> GLTFPhysicsShape::from_node(const CollisionShape3D *p_godo
 		importer_mesh->add_surface(Mesh::PRIMITIVE_TRIANGLES, surface_array);
 		gltf_shape->set_importer_mesh(importer_mesh);
 	} else {
-		ERR_PRINT("Tried to create a GLTFPhysicsShape from a CollisionShape3D node, but the given node's shape '" + String(Variant(shape_resource)) +
+		ERR_PRINT("Tried to create a GLTFPhysicsShape from a Shape3D, but the given shape '" + String(Variant(p_shape_resource)) +
 				"' had an unsupported shape type. Only BoxShape3D, CapsuleShape3D, CylinderShape3D, SphereShape3D, ConcavePolygonShape3D, and ConvexPolygonShape3D are supported.");
 	}
+	gltf_shape->_shape_cache = p_shape_resource;
 	return gltf_shape;
 }
 
-CollisionShape3D *GLTFPhysicsShape::to_node(bool p_cache_shapes) {
-	CollisionShape3D *godot_shape_node = memnew(CollisionShape3D);
+Ref<Shape3D> GLTFPhysicsShape::to_resource(bool p_cache_shapes) {
 	if (!p_cache_shapes || _shape_cache == nullptr) {
 		if (shape_type == "box") {
 			Ref<BoxShape3D> box;
@@ -237,19 +253,18 @@ CollisionShape3D *GLTFPhysicsShape::to_node(bool p_cache_shapes) {
 			sphere->set_radius(radius);
 			_shape_cache = sphere;
 		} else if (shape_type == "convex") {
-			ERR_FAIL_COND_V_MSG(importer_mesh.is_null(), godot_shape_node, "GLTFPhysicsShape: Error converting convex hull shape to a node: The mesh resource is null.");
+			ERR_FAIL_COND_V_MSG(importer_mesh.is_null(), _shape_cache, "GLTFPhysicsShape: Error converting convex hull shape to a shape resource: The mesh resource is null.");
 			Ref<ConvexPolygonShape3D> convex = importer_mesh->get_mesh()->create_convex_shape();
 			_shape_cache = convex;
 		} else if (shape_type == "trimesh") {
-			ERR_FAIL_COND_V_MSG(importer_mesh.is_null(), godot_shape_node, "GLTFPhysicsShape: Error converting concave mesh shape to a node: The mesh resource is null.");
+			ERR_FAIL_COND_V_MSG(importer_mesh.is_null(), _shape_cache, "GLTFPhysicsShape: Error converting concave mesh shape to a shape resource: The mesh resource is null.");
 			Ref<ConcavePolygonShape3D> concave = importer_mesh->create_trimesh_shape();
 			_shape_cache = concave;
 		} else {
-			ERR_PRINT("GLTFPhysicsShape: Error converting to a node: Shape type '" + shape_type + "' is unknown.");
+			ERR_PRINT("GLTFPhysicsShape: Error converting to a shape resource: Shape type '" + shape_type + "' is unknown.");
 		}
 	}
-	godot_shape_node->set_shape(_shape_cache);
-	return godot_shape_node;
+	return _shape_cache;
 }
 
 Ref<GLTFPhysicsShape> GLTFPhysicsShape::from_dictionary(const Dictionary p_dictionary) {
diff --git a/modules/gltf/extensions/physics/gltf_physics_shape.h b/modules/gltf/extensions/physics/gltf_physics_shape.h
index ec0a8931f1..a6974473ee 100644
--- a/modules/gltf/extensions/physics/gltf_physics_shape.h
+++ b/modules/gltf/extensions/physics/gltf_physics_shape.h
@@ -55,7 +55,7 @@ private:
 	bool is_trigger = false;
 	GLTFMeshIndex mesh_index = -1;
 	Ref<ImporterMesh> importer_mesh = nullptr;
-	// Internal only, for caching Godot shape resources. Used in `to_node`.
+	// Internal only, for caching Godot shape resources. Used in `to_resource` and `to_node`.
 	Ref<Shape3D> _shape_cache = nullptr;
 
 public:
@@ -83,6 +83,9 @@ public:
 	static Ref<GLTFPhysicsShape> from_node(const CollisionShape3D *p_shape_node);
 	CollisionShape3D *to_node(bool p_cache_shapes = false);
 
+	static Ref<GLTFPhysicsShape> from_resource(const Ref<Shape3D> &p_shape_resource);
+	Ref<Shape3D> to_resource(bool p_cache_shapes = false);
+
 	static Ref<GLTFPhysicsShape> from_dictionary(const Dictionary p_dictionary);
 	Dictionary to_dictionary() const;
 };
diff --git a/modules/gridmap/doc_classes/GridMap.xml b/modules/gridmap/doc_classes/GridMap.xml
index 7aeecef980..ef7276f493 100644
--- a/modules/gridmap/doc_classes/GridMap.xml
+++ b/modules/gridmap/doc_classes/GridMap.xml
@@ -12,8 +12,8 @@
 	</description>
 	<tutorials>
 		<link title="Using gridmaps">$DOCS_URL/tutorials/3d/using_gridmaps.html</link>
-		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/125</link>
-		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/126</link>
+		<link title="3D Platformer Demo">https://godotengine.org/asset-library/asset/2748</link>
+		<link title="3D Kinematic Character Demo">https://godotengine.org/asset-library/asset/2739</link>
 	</tutorials>
 	<methods>
 		<method name="clear">
diff --git a/modules/jsonrpc/SCsub b/modules/jsonrpc/SCsub
index fe5312670a..8ee4f8bfea 100644
--- a/modules/jsonrpc/SCsub
+++ b/modules/jsonrpc/SCsub
@@ -5,3 +5,10 @@ Import("env_modules")
 
 env_jsonrpc = env_modules.Clone()
 env_jsonrpc.add_source_files(env.modules_sources, "*.cpp")
+
+if env["tests"]:
+    env_jsonrpc.Append(CPPDEFINES=["TESTS_ENABLED"])
+    env_jsonrpc.add_source_files(env.modules_sources, "./tests/*.cpp")
+
+    if env["disable_exceptions"]:
+        env_jsonrpc.Append(CPPDEFINES=["DOCTEST_CONFIG_NO_EXCEPTIONS_BUT_WITH_ALL_ASSERTS"])
diff --git a/modules/jsonrpc/tests/test_jsonrpc.cpp b/modules/jsonrpc/tests/test_jsonrpc.cpp
new file mode 100644
index 0000000000..8500ff6da9
--- /dev/null
+++ b/modules/jsonrpc/tests/test_jsonrpc.cpp
@@ -0,0 +1,86 @@
+/**************************************************************************/
+/*  test_jsonrpc.cpp                                                      */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "test_jsonrpc.h"
+
+#include "core/io/json.h"
+
+namespace TestJSONRPC {
+
+void check_error_code(const Dictionary &p_dict, const JSONRPC::ErrorCode &p_code) {
+	CHECK(p_dict["jsonrpc"] == "2.0");
+	REQUIRE(p_dict.has("error"));
+	const Dictionary &err_body = p_dict["error"];
+	const int &code = err_body["code"];
+	CHECK(code == p_code);
+}
+
+void check_invalid(const Dictionary &p_dict) {
+	check_error_code(p_dict, JSONRPC::INVALID_REQUEST);
+}
+
+void check_invalid_string(const String &p_str) {
+	JSON json;
+	REQUIRE(json.parse(p_str) == OK);
+	const Dictionary &dict = json.get_data();
+	check_invalid(dict);
+}
+
+String TestClassJSONRPC::something(const String &p_in) {
+	return p_in + ", please";
+}
+
+void TestClassJSONRPC::_bind_methods() {
+	ClassDB::bind_method(D_METHOD("something", "in"), &TestClassJSONRPC::something);
+}
+
+void test_process_action(const Variant &p_in, const Variant &p_expected, bool p_process_array_elements) {
+	TestClassJSONRPC json_rpc = TestClassJSONRPC();
+	const Variant &observed = json_rpc.process_action(p_in, p_process_array_elements);
+	CHECK(observed == p_expected);
+}
+
+void test_process_string(const String &p_in, const String &p_expected) {
+	TestClassJSONRPC json_rpc = TestClassJSONRPC();
+	const String &out_str = json_rpc.process_string(p_in);
+	CHECK(out_str == p_expected);
+}
+
+void check_error_no_method(const Dictionary &p_dict) {
+	check_error_code(p_dict, JSONRPC::METHOD_NOT_FOUND);
+}
+
+void test_process_action_bad_method(const Dictionary &p_in) {
+	TestClassJSONRPC json_rpc = TestClassJSONRPC();
+	const Dictionary &out_dict = json_rpc.process_action(p_in);
+	check_error_no_method(out_dict);
+}
+
+} // namespace TestJSONRPC
diff --git a/modules/jsonrpc/tests/test_jsonrpc.h b/modules/jsonrpc/tests/test_jsonrpc.h
new file mode 100644
index 0000000000..5770581019
--- /dev/null
+++ b/modules/jsonrpc/tests/test_jsonrpc.h
@@ -0,0 +1,139 @@
+/**************************************************************************/
+/*  test_jsonrpc.h                                                        */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef TEST_JSONRPC_H
+#define TEST_JSONRPC_H
+
+#include "tests/test_macros.h"
+#include "tests/test_utils.h"
+
+#include "../jsonrpc.h"
+
+namespace TestJSONRPC {
+
+void check_invalid(const Dictionary &p_dict);
+
+TEST_CASE("[JSONRPC] process_action invalid") {
+	JSONRPC json_rpc = JSONRPC();
+
+	check_invalid(json_rpc.process_action("String is invalid"));
+	check_invalid(json_rpc.process_action(1234));
+	check_invalid(json_rpc.process_action(false));
+	check_invalid(json_rpc.process_action(3.14159));
+}
+
+void check_invalid_string(const String &p_str);
+
+TEST_CASE("[JSONRPC] process_string invalid") {
+	JSONRPC json_rpc = JSONRPC();
+
+	check_invalid_string(json_rpc.process_string("\"String is invalid\""));
+	check_invalid_string(json_rpc.process_string("1234"));
+	check_invalid_string(json_rpc.process_string("false"));
+	check_invalid_string(json_rpc.process_string("3.14159"));
+}
+
+class TestClassJSONRPC : public JSONRPC {
+	GDCLASS(TestClassJSONRPC, JSONRPC)
+
+public:
+	String something(const String &p_in);
+
+protected:
+	static void _bind_methods();
+};
+
+void test_process_action(const Variant &p_in, const Variant &p_expected, bool p_process_array_elements = false);
+
+TEST_CASE("[JSONRPC] process_action Dictionary") {
+	ClassDB::register_class<TestClassJSONRPC>();
+
+	Dictionary in_dict = Dictionary();
+	in_dict["method"] = "something";
+	in_dict["id"] = "ID";
+	in_dict["params"] = "yes";
+
+	Dictionary expected_dict = Dictionary();
+	expected_dict["jsonrpc"] = "2.0";
+	expected_dict["id"] = "ID";
+	expected_dict["result"] = "yes, please";
+
+	test_process_action(in_dict, expected_dict);
+}
+
+TEST_CASE("[JSONRPC] process_action Array") {
+	Array in;
+	Dictionary in_1;
+	in_1["method"] = "something";
+	in_1["id"] = 1;
+	in_1["params"] = "more";
+	in.push_back(in_1);
+	Dictionary in_2;
+	in_2["method"] = "something";
+	in_2["id"] = 2;
+	in_2["params"] = "yes";
+	in.push_back(in_2);
+
+	Array expected;
+	Dictionary expected_1;
+	expected_1["jsonrpc"] = "2.0";
+	expected_1["id"] = 1;
+	expected_1["result"] = "more, please";
+	expected.push_back(expected_1);
+	Dictionary expected_2;
+	expected_2["jsonrpc"] = "2.0";
+	expected_2["id"] = 2;
+	expected_2["result"] = "yes, please";
+	expected.push_back(expected_2);
+
+	test_process_action(in, expected, true);
+}
+
+void test_process_string(const String &p_in, const String &p_expected);
+
+TEST_CASE("[JSONRPC] process_string Dictionary") {
+	const String in = "{\"method\":\"something\",\"id\":\"ID\",\"params\":\"yes\"}";
+	const String expected = "{\"id\":\"ID\",\"jsonrpc\":\"2.0\",\"result\":\"yes, please\"}";
+
+	test_process_string(in, expected);
+}
+
+void test_process_action_bad_method(const Dictionary &p_in);
+
+TEST_CASE("[JSONRPC] process_action bad method") {
+	Dictionary in_dict;
+	in_dict["method"] = "nothing";
+
+	test_process_action_bad_method(in_dict);
+}
+
+} // namespace TestJSONRPC
+
+#endif // TEST_JSONRPC_H
diff --git a/modules/ktx/SCsub b/modules/ktx/SCsub
index acdb829979..c4cb732498 100644
--- a/modules/ktx/SCsub
+++ b/modules/ktx/SCsub
@@ -22,6 +22,7 @@ thirdparty_sources = [
     "lib/texture1.c",
     "lib/texture2.c",
     "lib/vkformat_check.c",
+    "lib/vkformat_typesize.c",
     "lib/dfdutils/createdfd.c",
     "lib/dfdutils/colourspaces.c",
     "lib/dfdutils/interpretdfd.c",
diff --git a/modules/mono/csharp_script.cpp b/modules/mono/csharp_script.cpp
index 0dd1dc7c12..a3464ccfc2 100644
--- a/modules/mono/csharp_script.cpp
+++ b/modules/mono/csharp_script.cpp
@@ -410,115 +410,6 @@ ScriptLanguage::ScriptNameCasing CSharpLanguage::preferred_file_name_casing() co
 }
 
 #ifdef TOOLS_ENABLED
-struct VariantCsName {
-	Variant::Type variant_type;
-	const String cs_type;
-};
-
-static String variant_type_to_managed_name(const String &p_var_type_name) {
-	if (p_var_type_name.is_empty()) {
-		return "Variant";
-	}
-
-	if (ClassDB::class_exists(p_var_type_name)) {
-		return pascal_to_pascal_case(p_var_type_name);
-	}
-
-	if (p_var_type_name == Variant::get_type_name(Variant::OBJECT)) {
-		return "GodotObject";
-	}
-
-	if (p_var_type_name == Variant::get_type_name(Variant::INT)) {
-		return "long";
-	}
-
-	if (p_var_type_name == Variant::get_type_name(Variant::FLOAT)) {
-		return "double";
-	}
-
-	if (p_var_type_name == Variant::get_type_name(Variant::STRING)) {
-		return "string"; // I prefer this one >:[
-	}
-
-	if (p_var_type_name == Variant::get_type_name(Variant::DICTIONARY)) {
-		return "Collections.Dictionary";
-	}
-
-	if (p_var_type_name.begins_with(Variant::get_type_name(Variant::ARRAY) + "[")) {
-		String element_type = p_var_type_name.trim_prefix(Variant::get_type_name(Variant::ARRAY) + "[").trim_suffix("]");
-		return "Collections.Array<" + variant_type_to_managed_name(element_type) + ">";
-	}
-
-	if (p_var_type_name == Variant::get_type_name(Variant::ARRAY)) {
-		return "Collections.Array";
-	}
-
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_BYTE_ARRAY)) {
-		return "byte[]";
-	}
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_INT32_ARRAY)) {
-		return "int[]";
-	}
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_INT64_ARRAY)) {
-		return "long[]";
-	}
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_FLOAT32_ARRAY)) {
-		return "float[]";
-	}
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_FLOAT64_ARRAY)) {
-		return "double[]";
-	}
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_STRING_ARRAY)) {
-		return "string[]";
-	}
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_VECTOR2_ARRAY)) {
-		return "Vector2[]";
-	}
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_VECTOR3_ARRAY)) {
-		return "Vector3[]";
-	}
-	if (p_var_type_name == Variant::get_type_name(Variant::PACKED_COLOR_ARRAY)) {
-		return "Color[]";
-	}
-
-	if (p_var_type_name == Variant::get_type_name(Variant::SIGNAL)) {
-		return "Signal";
-	}
-
-	const VariantCsName var_types[] = {
-		{ Variant::BOOL, "bool" },
-		{ Variant::INT, "long" },
-		{ Variant::VECTOR2, "Vector2" },
-		{ Variant::VECTOR2I, "Vector2I" },
-		{ Variant::RECT2, "Rect2" },
-		{ Variant::RECT2I, "Rect2I" },
-		{ Variant::VECTOR3, "Vector3" },
-		{ Variant::VECTOR3I, "Vector3I" },
-		{ Variant::TRANSFORM2D, "Transform2D" },
-		{ Variant::VECTOR4, "Vector4" },
-		{ Variant::VECTOR4I, "Vector4I" },
-		{ Variant::PLANE, "Plane" },
-		{ Variant::QUATERNION, "Quaternion" },
-		{ Variant::AABB, "Aabb" },
-		{ Variant::BASIS, "Basis" },
-		{ Variant::TRANSFORM3D, "Transform3D" },
-		{ Variant::PROJECTION, "Projection" },
-		{ Variant::COLOR, "Color" },
-		{ Variant::STRING_NAME, "StringName" },
-		{ Variant::NODE_PATH, "NodePath" },
-		{ Variant::RID, "Rid" },
-		{ Variant::CALLABLE, "Callable" },
-	};
-
-	for (unsigned int i = 0; i < sizeof(var_types) / sizeof(VariantCsName); i++) {
-		if (p_var_type_name == Variant::get_type_name(var_types[i].variant_type)) {
-			return var_types[i].cs_type;
-		}
-	}
-
-	return "Variant";
-}
-
 String CSharpLanguage::make_function(const String &, const String &p_name, const PackedStringArray &p_args) const {
 	// The make_function() API does not work for C# scripts.
 	// It will always append the generated function at the very end of the script. In C#, it will break compilation by
diff --git a/modules/mono/editor/GodotTools/GodotTools/Export/ExportPlugin.cs b/modules/mono/editor/GodotTools/GodotTools/Export/ExportPlugin.cs
index 46020fda89..d3720dcb72 100644
--- a/modules/mono/editor/GodotTools/GodotTools/Export/ExportPlugin.cs
+++ b/modules/mono/editor/GodotTools/GodotTools/Export/ExportPlugin.cs
@@ -90,6 +90,13 @@ namespace GodotTools.Export
                     $"Resource of type {Internal.CSharpLanguageType} has an invalid file extension: {path}",
                     nameof(path));
 
+            if (!ProjectContainsDotNet())
+            {
+                _maybeLastExportError = $"This project contains C# files but no solution file was found at the following path: {GodotSharpDirs.ProjectSlnPath}\n" +
+                    "A solution file is required for projects with C# files. Please ensure that the solution file exists in the specified location and try again.";
+                throw new InvalidOperationException($"{path} is a C# file but no solution file exists.");
+            }
+
             // TODO: What if the source file is not part of the game's C# project?
 
             bool includeScriptsContent = (bool)GetOption("dotnet/include_scripts_content");
@@ -202,7 +209,7 @@ namespace GodotTools.Export
 
             List<string> outputPaths = new();
 
-            bool embedBuildResults = (bool)GetOption("dotnet/embed_build_outputs") || platform == OS.Platforms.Android;
+            bool embedBuildResults = ((bool)GetOption("dotnet/embed_build_outputs") || platform == OS.Platforms.Android) && platform != OS.Platforms.MacOS;
 
             foreach (PublishConfig config in targets)
             {
diff --git a/modules/mono/glue/GodotSharp/GodotSharp/Core/Array.cs b/modules/mono/glue/GodotSharp/GodotSharp/Core/Array.cs
index 9b5aec7031..fb1d32c0cb 100644
--- a/modules/mono/glue/GodotSharp/GodotSharp/Core/Array.cs
+++ b/modules/mono/glue/GodotSharp/GodotSharp/Core/Array.cs
@@ -5,6 +5,7 @@ using System.Diagnostics.CodeAnalysis;
 using System.Linq;
 using System.Runtime.CompilerServices;
 using Godot.NativeInterop;
+using System.Diagnostics;
 
 #nullable enable
 
@@ -16,6 +17,8 @@ namespace Godot.Collections
     /// interfacing with the engine. Otherwise prefer .NET collections
     /// such as <see cref="System.Array"/> or <see cref="List{T}"/>.
     /// </summary>
+    [DebuggerTypeProxy(typeof(ArrayDebugView<Variant>))]
+    [DebuggerDisplay("Count = {Count}")]
 #pragma warning disable CA1710 // Identifiers should have correct suffix
     public sealed class Array :
 #pragma warning restore CA1710
@@ -1040,6 +1043,8 @@ namespace Godot.Collections
     /// such as arrays or <see cref="List{T}"/>.
     /// </summary>
     /// <typeparam name="T">The type of the array.</typeparam>
+    [DebuggerTypeProxy(typeof(ArrayDebugView<>))]
+    [DebuggerDisplay("Count = {Count}")]
     [SuppressMessage("ReSharper", "RedundantExtendsListEntry")]
     [SuppressMessage("Naming", "CA1710", MessageId = "Identifiers should have correct suffix")]
     public sealed class Array<[MustBeVariant] T> :
diff --git a/modules/mono/glue/GodotSharp/GodotSharp/Core/DebugView.cs b/modules/mono/glue/GodotSharp/GodotSharp/Core/DebugView.cs
new file mode 100644
index 0000000000..6d6eb61f8f
--- /dev/null
+++ b/modules/mono/glue/GodotSharp/GodotSharp/Core/DebugView.cs
@@ -0,0 +1,73 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+namespace Godot.Collections
+{
+    internal sealed class ArrayDebugView<T>
+    {
+        private readonly IList<T> _array;
+
+        public ArrayDebugView(IList<T> array)
+        {
+            ArgumentNullException.ThrowIfNull(array);
+
+            _array = array;
+        }
+
+        [DebuggerBrowsable(DebuggerBrowsableState.RootHidden)]
+        public T[] Items
+        {
+            get
+            {
+                var items = new T[_array.Count];
+                _array.CopyTo(items, 0);
+                return items;
+            }
+        }
+    }
+
+    internal sealed class DictionaryDebugView<TKey, TValue>
+    {
+        private readonly IDictionary<TKey, TValue> _dictionary;
+
+        public DictionaryDebugView(IDictionary<TKey, TValue> dictionary)
+        {
+            ArgumentNullException.ThrowIfNull(dictionary);
+
+            _dictionary = dictionary;
+        }
+
+        [DebuggerBrowsable(DebuggerBrowsableState.RootHidden)]
+        public DictionaryKeyItemDebugView<TKey, TValue>[] Items
+        {
+            get
+            {
+                var items = new KeyValuePair<TKey, TValue>[_dictionary.Count];
+                var views = new DictionaryKeyItemDebugView<TKey, TValue>[_dictionary.Count];
+                _dictionary.CopyTo(items, 0);
+                for (int i = 0; i < items.Length; i++)
+                {
+                    views[i] = new DictionaryKeyItemDebugView<TKey, TValue>(items[i]);
+                }
+                return views;
+            }
+        }
+    }
+
+    [DebuggerDisplay("{Value}", Name = "[{Key}]")]
+    internal readonly struct DictionaryKeyItemDebugView<TKey, TValue>
+    {
+        public DictionaryKeyItemDebugView(KeyValuePair<TKey, TValue> keyValue)
+        {
+            Key = keyValue.Key;
+            Value = keyValue.Value;
+        }
+
+        [DebuggerBrowsable(DebuggerBrowsableState.Collapsed)]
+        public TKey Key { get; }
+
+        [DebuggerBrowsable(DebuggerBrowsableState.Collapsed)]
+        public TValue Value { get; }
+    }
+}
diff --git a/modules/mono/glue/GodotSharp/GodotSharp/Core/Dictionary.cs b/modules/mono/glue/GodotSharp/GodotSharp/Core/Dictionary.cs
index d08fad90db..864815866a 100644
--- a/modules/mono/glue/GodotSharp/GodotSharp/Core/Dictionary.cs
+++ b/modules/mono/glue/GodotSharp/GodotSharp/Core/Dictionary.cs
@@ -4,6 +4,7 @@ using System.Collections;
 using System.Diagnostics.CodeAnalysis;
 using System.Runtime.CompilerServices;
 using Godot.NativeInterop;
+using System.Diagnostics;
 
 #nullable enable
 
@@ -14,6 +15,8 @@ namespace Godot.Collections
     /// typed elements allocated in the engine in C++. Useful when
     /// interfacing with the engine.
     /// </summary>
+    [DebuggerTypeProxy(typeof(DictionaryDebugView<Variant, Variant>))]
+    [DebuggerDisplay("Count = {Count}")]
     public sealed class Dictionary :
         IDictionary<Variant, Variant>,
         IReadOnlyDictionary<Variant, Variant>,
@@ -480,6 +483,8 @@ namespace Godot.Collections
     /// </summary>
     /// <typeparam name="TKey">The type of the dictionary's keys.</typeparam>
     /// <typeparam name="TValue">The type of the dictionary's values.</typeparam>
+    [DebuggerTypeProxy(typeof(DictionaryDebugView<,>))]
+    [DebuggerDisplay("Count = {Count}")]
     public class Dictionary<[MustBeVariant] TKey, [MustBeVariant] TValue> :
         IDictionary<TKey, TValue>,
         IReadOnlyDictionary<TKey, TValue>,
diff --git a/modules/mono/glue/GodotSharp/GodotSharp/GodotSharp.csproj b/modules/mono/glue/GodotSharp/GodotSharp/GodotSharp.csproj
index d54942e654..d4c11da963 100644
--- a/modules/mono/glue/GodotSharp/GodotSharp/GodotSharp.csproj
+++ b/modules/mono/glue/GodotSharp/GodotSharp/GodotSharp.csproj
@@ -77,6 +77,7 @@
     <Compile Include="Core\Color.cs" />
     <Compile Include="Core\Colors.cs" />
     <Compile Include="Core\DebuggingUtils.cs" />
+    <Compile Include="Core\DebugView.cs" />
     <Compile Include="Core\DelegateUtils.cs" />
     <Compile Include="Core\Dictionary.cs" />
     <Compile Include="Core\Dispatcher.cs" />
diff --git a/modules/mono/mono_gd/gd_mono.h b/modules/mono/mono_gd/gd_mono.h
index 1b46a619ca..0cb087db57 100644
--- a/modules/mono/mono_gd/gd_mono.h
+++ b/modules/mono/mono_gd/gd_mono.h
@@ -77,7 +77,9 @@ class GDMono {
 	void _try_load_project_assembly();
 #endif
 
+#ifdef DEBUG_METHODS_ENABLED
 	uint64_t api_core_hash = 0;
+#endif
 #ifdef TOOLS_ENABLED
 	uint64_t api_editor_hash = 0;
 #endif
diff --git a/modules/multiplayer/multiplayer_spawner.cpp b/modules/multiplayer/multiplayer_spawner.cpp
index 264a2e9c8e..6c0669b2f3 100644
--- a/modules/multiplayer/multiplayer_spawner.cpp
+++ b/modules/multiplayer/multiplayer_spawner.cpp
@@ -251,6 +251,7 @@ NodePath MultiplayerSpawner::get_spawn_path() const {
 void MultiplayerSpawner::set_spawn_path(const NodePath &p_path) {
 	spawn_path = p_path;
 	_update_spawn_node();
+	update_configuration_warnings();
 }
 
 void MultiplayerSpawner::_track(Node *p_node, const Variant &p_argument, int p_scene_id) {
diff --git a/modules/multiplayer/multiplayer_synchronizer.cpp b/modules/multiplayer/multiplayer_synchronizer.cpp
index 02e3a11964..c2ce500af8 100644
--- a/modules/multiplayer/multiplayer_synchronizer.cpp
+++ b/modules/multiplayer/multiplayer_synchronizer.cpp
@@ -354,6 +354,7 @@ void MultiplayerSynchronizer::set_root_path(const NodePath &p_path) {
 	_stop();
 	root_path = p_path;
 	_start();
+	update_configuration_warnings();
 }
 
 NodePath MultiplayerSynchronizer::get_root_path() const {
diff --git a/modules/openxr/doc_classes/OpenXRCompositionLayer.xml b/modules/openxr/doc_classes/OpenXRCompositionLayer.xml
index 66baaf096d..168e0bf077 100644
--- a/modules/openxr/doc_classes/OpenXRCompositionLayer.xml
+++ b/modules/openxr/doc_classes/OpenXRCompositionLayer.xml
@@ -10,6 +10,15 @@
 	<tutorials>
 	</tutorials>
 	<methods>
+		<method name="intersects_ray" qualifiers="const">
+			<return type="Vector2" />
+			<param index="0" name="origin" type="Vector3" />
+			<param index="1" name="direction" type="Vector3" />
+			<description>
+				Returns UV coordinates where the given ray intersects with the composition layer. [param origin] and [param direction] must be in global space.
+				Returns [code]Vector2(-1.0, -1.0)[/code] if the ray doesn't intersect.
+			</description>
+		</method>
 		<method name="is_natively_supported" qualifiers="const">
 			<return type="bool" />
 			<description>
diff --git a/modules/openxr/doc_classes/OpenXRExtensionWrapperExtension.xml b/modules/openxr/doc_classes/OpenXRExtensionWrapperExtension.xml
index 9d6b197ee1..79aa547c52 100644
--- a/modules/openxr/doc_classes/OpenXRExtensionWrapperExtension.xml
+++ b/modules/openxr/doc_classes/OpenXRExtensionWrapperExtension.xml
@@ -46,6 +46,18 @@
 				Returns a [PackedStringArray] of positional tracker names that are used within the extension wrapper.
 			</description>
 		</method>
+		<method name="_get_viewport_composition_layer_extension_properties" qualifiers="virtual">
+			<return type="Dictionary[]" />
+			<description>
+				Gets an array of [Dictionary]s that represent properties, just like [method Object._get_property_list], that will be added to [OpenXRCompositionLayer] nodes.
+			</description>
+		</method>
+		<method name="_get_viewport_composition_layer_extension_property_defaults" qualifiers="virtual">
+			<return type="Dictionary" />
+			<description>
+				Gets a [Dictionary] containing the default values for the properties returned by [method _get_viewport_composition_layer_extension_properties].
+			</description>
+		</method>
 		<method name="_on_before_instance_created" qualifiers="virtual">
 			<return type="void" />
 			<description>
@@ -152,6 +164,14 @@
 				Called when the OpenXR session state is changed to visible. This means OpenXR is now ready to receive frames.
 			</description>
 		</method>
+		<method name="_on_viewport_composition_layer_destroyed" qualifiers="virtual">
+			<return type="void" />
+			<param index="0" name="layer" type="const void*" />
+			<description>
+				Called when a composition layer created via [OpenXRCompositionLayer] is destroyed.
+				[param layer] is a pointer to an [code]XrCompositionLayerBaseHeader[/code] struct.
+			</description>
+		</method>
 		<method name="_set_hand_joint_locations_and_get_next_pointer" qualifiers="virtual">
 			<return type="int" />
 			<param index="0" name="hand_index" type="int" />
@@ -188,6 +208,17 @@
 				Adds additional data structures when interogating OpenXR system abilities.
 			</description>
 		</method>
+		<method name="_set_viewport_composition_layer_and_get_next_pointer" qualifiers="virtual">
+			<return type="int" />
+			<param index="0" name="layer" type="const void*" />
+			<param index="1" name="property_values" type="Dictionary" />
+			<param index="2" name="next_pointer" type="void*" />
+			<description>
+				Adds additional data structures to composition layers created by [OpenXRCompositionLayer].
+				[param property_values] contains the values of the properties returned by [method _get_viewport_composition_layer_extension_properties].
+				[param layer] is a pointer to an [code]XrCompositionLayerBaseHeader[/code] struct.
+			</description>
+		</method>
 		<method name="get_openxr_api">
 			<return type="OpenXRAPIExtension" />
 			<description>
diff --git a/modules/openxr/doc_classes/OpenXRInterface.xml b/modules/openxr/doc_classes/OpenXRInterface.xml
index 5d38788dcd..1136ac1b69 100644
--- a/modules/openxr/doc_classes/OpenXRInterface.xml
+++ b/modules/openxr/doc_classes/OpenXRInterface.xml
@@ -152,6 +152,13 @@
 				Informs the user queued a recenter of the player position.
 			</description>
 		</signal>
+		<signal name="refresh_rate_changed">
+			<param index="0" name="refresh_rate" type="float" />
+			<description>
+				Informs the user the HMD refresh rate has changed.
+				[b]Node:[/b] Only emitted if XR runtime supports the refresh rate extension.
+			</description>
+		</signal>
 		<signal name="session_begun">
 			<description>
 				Informs our OpenXR session has been started.
diff --git a/modules/openxr/extensions/openxr_composition_layer_extension.cpp b/modules/openxr/extensions/openxr_composition_layer_extension.cpp
index 7c0c69d747..1fba8e5f8b 100644
--- a/modules/openxr/extensions/openxr_composition_layer_extension.cpp
+++ b/modules/openxr/extensions/openxr_composition_layer_extension.cpp
@@ -86,11 +86,11 @@ int OpenXRCompositionLayerExtension::get_composition_layer_order(int p_index) {
 	return composition_layers[p_index]->get_sort_order();
 }
 
-void OpenXRCompositionLayerExtension::register_composition_layer_provider(OpenXRViewportCompositionLayerProvider *p_composition_layer) {
+void OpenXRCompositionLayerExtension::register_viewport_composition_layer_provider(OpenXRViewportCompositionLayerProvider *p_composition_layer) {
 	composition_layers.push_back(p_composition_layer);
 }
 
-void OpenXRCompositionLayerExtension::unregister_composition_layer_provider(OpenXRViewportCompositionLayerProvider *p_composition_layer) {
+void OpenXRCompositionLayerExtension::unregister_viewport_composition_layer_provider(OpenXRViewportCompositionLayerProvider *p_composition_layer) {
 	composition_layers.erase(p_composition_layer);
 }
 
@@ -123,6 +123,10 @@ OpenXRViewportCompositionLayerProvider::OpenXRViewportCompositionLayerProvider(X
 }
 
 OpenXRViewportCompositionLayerProvider::~OpenXRViewportCompositionLayerProvider() {
+	for (OpenXRExtensionWrapper *extension : OpenXRAPI::get_registered_extension_wrappers()) {
+		extension->on_viewport_composition_layer_destroyed(composition_layer);
+	}
+
 	// This will reset the viewport and free the swapchain too.
 	set_viewport(RID(), Size2i());
 }
@@ -159,6 +163,11 @@ void OpenXRViewportCompositionLayerProvider::set_viewport(RID p_viewport, Size2i
 	}
 }
 
+void OpenXRViewportCompositionLayerProvider::set_extension_property_values(const Dictionary &p_extension_property_values) {
+	extension_property_values = p_extension_property_values;
+	extension_property_values_changed = true;
+}
+
 void OpenXRViewportCompositionLayerProvider::on_pre_render() {
 	RenderingServer *rs = RenderingServer::get_singleton();
 	ERR_FAIL_NULL(rs);
@@ -187,20 +196,20 @@ XrCompositionLayerBaseHeader *OpenXRViewportCompositionLayerProvider::get_compos
 		return nullptr;
 	}
 
-	if (swapchain_info.swapchain == XR_NULL_HANDLE) {
+	if (swapchain_info.get_swapchain() == XR_NULL_HANDLE) {
 		// Don't have a swapchain to display? Ignore our layer.
 		return nullptr;
 	}
 
-	if (swapchain_info.image_acquired) {
-		openxr_api->release_image(swapchain_info);
+	if (swapchain_info.is_image_acquired()) {
+		swapchain_info.release();
 	}
 
 	// Update the layer struct for the swapchain.
 	switch (composition_layer->type) {
 		case XR_TYPE_COMPOSITION_LAYER_QUAD: {
 			XrCompositionLayerQuad *quad_layer = (XrCompositionLayerQuad *)composition_layer;
-			quad_layer->subImage.swapchain = swapchain_info.swapchain;
+			quad_layer->subImage.swapchain = swapchain_info.get_swapchain();
 			quad_layer->subImage.imageArrayIndex = 0;
 			quad_layer->subImage.imageRect.offset.x = 0;
 			quad_layer->subImage.imageRect.offset.y = 0;
@@ -210,7 +219,7 @@ XrCompositionLayerBaseHeader *OpenXRViewportCompositionLayerProvider::get_compos
 
 		case XR_TYPE_COMPOSITION_LAYER_CYLINDER_KHR: {
 			XrCompositionLayerCylinderKHR *cylinder_layer = (XrCompositionLayerCylinderKHR *)composition_layer;
-			cylinder_layer->subImage.swapchain = swapchain_info.swapchain;
+			cylinder_layer->subImage.swapchain = swapchain_info.get_swapchain();
 			cylinder_layer->subImage.imageArrayIndex = 0;
 			cylinder_layer->subImage.imageRect.offset.x = 0;
 			cylinder_layer->subImage.imageRect.offset.y = 0;
@@ -220,7 +229,7 @@ XrCompositionLayerBaseHeader *OpenXRViewportCompositionLayerProvider::get_compos
 
 		case XR_TYPE_COMPOSITION_LAYER_EQUIRECT2_KHR: {
 			XrCompositionLayerEquirect2KHR *equirect_layer = (XrCompositionLayerEquirect2KHR *)composition_layer;
-			equirect_layer->subImage.swapchain = swapchain_info.swapchain;
+			equirect_layer->subImage.swapchain = swapchain_info.get_swapchain();
 			equirect_layer->subImage.imageArrayIndex = 0;
 			equirect_layer->subImage.imageRect.offset.x = 0;
 			equirect_layer->subImage.imageRect.offset.y = 0;
@@ -233,6 +242,19 @@ XrCompositionLayerBaseHeader *OpenXRViewportCompositionLayerProvider::get_compos
 		} break;
 	}
 
+	if (extension_property_values_changed) {
+		extension_property_values_changed = false;
+
+		void *next_pointer = nullptr;
+		for (OpenXRExtensionWrapper *extension : OpenXRAPI::get_registered_extension_wrappers()) {
+			void *np = extension->set_viewport_composition_layer_and_get_next_pointer(composition_layer, extension_property_values, next_pointer);
+			if (np) {
+				next_pointer = np;
+			}
+		}
+		composition_layer->next = next_pointer;
+	}
+
 	return composition_layer;
 }
 
@@ -247,14 +269,16 @@ bool OpenXRViewportCompositionLayerProvider::update_and_acquire_swapchain(bool p
 	}
 
 	// See if our current swapchain is outdated.
-	if (swapchain_info.swapchain != XR_NULL_HANDLE) {
+	if (swapchain_info.get_swapchain() != XR_NULL_HANDLE) {
 		// If this swap chain, or the previous one, were static, then we can't reuse it.
 		if (swapchain_size == viewport_size && !p_static_image && !static_image) {
 			// We're all good! Just acquire it.
-			return openxr_api->acquire_image(swapchain_info);
+			// We can ignore should_render here, return will be false.
+			XrBool32 should_render = true;
+			return swapchain_info.acquire(should_render);
 		}
 
-		openxr_api->free_swapchain(swapchain_info);
+		swapchain_info.queue_free();
 	}
 
 	// Create our new swap chain
@@ -265,13 +289,15 @@ bool OpenXRViewportCompositionLayerProvider::update_and_acquire_swapchain(bool p
 	if (p_static_image) {
 		create_flags |= XR_SWAPCHAIN_CREATE_STATIC_IMAGE_BIT;
 	}
-	if (!openxr_api->create_swapchain(create_flags, XR_SWAPCHAIN_USAGE_SAMPLED_BIT | XR_SWAPCHAIN_USAGE_COLOR_ATTACHMENT_BIT | XR_SWAPCHAIN_USAGE_MUTABLE_FORMAT_BIT, swapchain_format, viewport_size.width, viewport_size.height, sample_count, array_size, swapchain_info.swapchain, &swapchain_info.swapchain_graphics_data)) {
+	if (!swapchain_info.create(create_flags, XR_SWAPCHAIN_USAGE_SAMPLED_BIT | XR_SWAPCHAIN_USAGE_COLOR_ATTACHMENT_BIT | XR_SWAPCHAIN_USAGE_MUTABLE_FORMAT_BIT, swapchain_format, viewport_size.width, viewport_size.height, sample_count, array_size)) {
 		swapchain_size = Size2i();
 		return false;
 	}
 
-	// Acquire our image so we can start rendering into it
-	bool ret = openxr_api->acquire_image(swapchain_info);
+	// Acquire our image so we can start rendering into it,
+	// we can ignore should_render here, ret will be false.
+	XrBool32 should_render = true;
+	bool ret = swapchain_info.acquire(should_render);
 
 	swapchain_size = viewport_size;
 	static_image = p_static_image;
@@ -279,8 +305,8 @@ bool OpenXRViewportCompositionLayerProvider::update_and_acquire_swapchain(bool p
 }
 
 void OpenXRViewportCompositionLayerProvider::free_swapchain() {
-	if (swapchain_info.swapchain != XR_NULL_HANDLE) {
-		openxr_api->free_swapchain(swapchain_info);
+	if (swapchain_info.get_swapchain() != XR_NULL_HANDLE) {
+		swapchain_info.queue_free();
 	}
 
 	swapchain_size = Size2i();
@@ -292,5 +318,5 @@ RID OpenXRViewportCompositionLayerProvider::get_current_swapchain_texture() {
 		return RID();
 	}
 
-	return openxr_api->get_image(swapchain_info);
+	return swapchain_info.get_image();
 }
diff --git a/modules/openxr/extensions/openxr_composition_layer_extension.h b/modules/openxr/extensions/openxr_composition_layer_extension.h
index 7cc35005f8..4fefc416e6 100644
--- a/modules/openxr/extensions/openxr_composition_layer_extension.h
+++ b/modules/openxr/extensions/openxr_composition_layer_extension.h
@@ -57,8 +57,8 @@ public:
 	virtual XrCompositionLayerBaseHeader *get_composition_layer(int p_index) override;
 	virtual int get_composition_layer_order(int p_index) override;
 
-	void register_composition_layer_provider(OpenXRViewportCompositionLayerProvider *p_composition_layer);
-	void unregister_composition_layer_provider(OpenXRViewportCompositionLayerProvider *p_composition_layer);
+	void register_viewport_composition_layer_provider(OpenXRViewportCompositionLayerProvider *p_composition_layer);
+	void unregister_viewport_composition_layer_provider(OpenXRViewportCompositionLayerProvider *p_composition_layer);
 
 	bool is_available(XrStructureType p_which);
 
@@ -75,6 +75,8 @@ class OpenXRViewportCompositionLayerProvider {
 	XrCompositionLayerBaseHeader *composition_layer = nullptr;
 	int sort_order = 1;
 	bool alpha_blend = false;
+	Dictionary extension_property_values;
+	bool extension_property_values_changed = true;
 
 	RID viewport;
 	Size2i viewport_size;
@@ -102,6 +104,8 @@ public:
 	void set_viewport(RID p_viewport, Size2i p_size);
 	RID get_viewport() const { return viewport; }
 
+	void set_extension_property_values(const Dictionary &p_property_values);
+
 	void on_pre_render();
 	XrCompositionLayerBaseHeader *get_composition_layer();
 
diff --git a/modules/openxr/extensions/openxr_extension_wrapper.h b/modules/openxr/extensions/openxr_extension_wrapper.h
index ad326472ab..ce03df0b30 100644
--- a/modules/openxr/extensions/openxr_extension_wrapper.h
+++ b/modules/openxr/extensions/openxr_extension_wrapper.h
@@ -96,6 +96,11 @@ public:
 	virtual void on_state_loss_pending() {} // `on_state_loss_pending` is called when the OpenXR session state is changed to loss pending.
 	virtual void on_state_exiting() {} // `on_state_exiting` is called when the OpenXR session state is changed to exiting.
 
+	virtual void *set_viewport_composition_layer_and_get_next_pointer(const XrCompositionLayerBaseHeader *p_layer, Dictionary p_property_values, void *p_next_pointer) { return p_next_pointer; } // Add additional data structures to composition layers created via OpenXRCompositionLayer.
+	virtual void on_viewport_composition_layer_destroyed(const XrCompositionLayerBaseHeader *p_layer) {} // `on_viewport_composition_layer_destroyed` is called when a composition layer created via OpenXRCompositionLayer is destroyed.
+	virtual void get_viewport_composition_layer_extension_properties(List<PropertyInfo> *p_property_list) {} // Get additional property definitions for OpenXRCompositionLayer.
+	virtual Dictionary get_viewport_composition_layer_extension_property_defaults() { return Dictionary(); } // Get the default values for the additional property definitions for OpenXRCompositionLayer.
+
 	// `on_event_polled` is called when there is an OpenXR event to process.
 	// Should return true if the event was handled, false otherwise.
 	virtual bool on_event_polled(const XrEventDataBuffer &event) {
diff --git a/modules/openxr/extensions/openxr_extension_wrapper_extension.cpp b/modules/openxr/extensions/openxr_extension_wrapper_extension.cpp
index 60a934e3a8..3b31e1b1f6 100644
--- a/modules/openxr/extensions/openxr_extension_wrapper_extension.cpp
+++ b/modules/openxr/extensions/openxr_extension_wrapper_extension.cpp
@@ -60,6 +60,10 @@ void OpenXRExtensionWrapperExtension::_bind_methods() {
 	GDVIRTUAL_BIND(_on_state_loss_pending);
 	GDVIRTUAL_BIND(_on_state_exiting);
 	GDVIRTUAL_BIND(_on_event_polled, "event");
+	GDVIRTUAL_BIND(_set_viewport_composition_layer_and_get_next_pointer, "layer", "property_values", "next_pointer");
+	GDVIRTUAL_BIND(_get_viewport_composition_layer_extension_properties);
+	GDVIRTUAL_BIND(_get_viewport_composition_layer_extension_property_defaults);
+	GDVIRTUAL_BIND(_on_viewport_composition_layer_destroyed, "layer");
 
 	ClassDB::bind_method(D_METHOD("get_openxr_api"), &OpenXRExtensionWrapperExtension::get_openxr_api);
 	ClassDB::bind_method(D_METHOD("register_extension_wrapper"), &OpenXRExtensionWrapperExtension::register_extension_wrapper);
@@ -240,6 +244,36 @@ bool OpenXRExtensionWrapperExtension::on_event_polled(const XrEventDataBuffer &p
 	return false;
 }
 
+void *OpenXRExtensionWrapperExtension::set_viewport_composition_layer_and_get_next_pointer(const XrCompositionLayerBaseHeader *p_layer, Dictionary p_property_values, void *p_next_pointer) {
+	uint64_t pointer = 0;
+
+	if (GDVIRTUAL_CALL(_set_viewport_composition_layer_and_get_next_pointer, GDExtensionConstPtr<void>(p_layer), p_property_values, GDExtensionPtr<void>(p_next_pointer), pointer)) {
+		return reinterpret_cast<void *>(pointer);
+	}
+
+	return p_next_pointer;
+}
+
+void OpenXRExtensionWrapperExtension::on_viewport_composition_layer_destroyed(const XrCompositionLayerBaseHeader *p_layer) {
+	GDVIRTUAL_CALL(_on_viewport_composition_layer_destroyed, GDExtensionConstPtr<void>(p_layer));
+}
+
+void OpenXRExtensionWrapperExtension::get_viewport_composition_layer_extension_properties(List<PropertyInfo> *p_property_list) {
+	TypedArray<Dictionary> properties;
+
+	if (GDVIRTUAL_CALL(_get_viewport_composition_layer_extension_properties, properties)) {
+		for (int i = 0; i < properties.size(); i++) {
+			p_property_list->push_back(PropertyInfo::from_dict(properties[i]));
+		}
+	}
+}
+
+Dictionary OpenXRExtensionWrapperExtension::get_viewport_composition_layer_extension_property_defaults() {
+	Dictionary property_defaults;
+	GDVIRTUAL_CALL(_get_viewport_composition_layer_extension_property_defaults, property_defaults);
+	return property_defaults;
+}
+
 Ref<OpenXRAPIExtension> OpenXRExtensionWrapperExtension::get_openxr_api() {
 	return openxr_api;
 }
diff --git a/modules/openxr/extensions/openxr_extension_wrapper_extension.h b/modules/openxr/extensions/openxr_extension_wrapper_extension.h
index d3b78bf617..71d2a57ff8 100644
--- a/modules/openxr/extensions/openxr_extension_wrapper_extension.h
+++ b/modules/openxr/extensions/openxr_extension_wrapper_extension.h
@@ -38,6 +38,7 @@
 #include "core/os/os.h"
 #include "core/os/thread_safe.h"
 #include "core/variant/native_ptr.h"
+#include "core/variant/typed_array.h"
 
 class OpenXRExtensionWrapperExtension : public Object, public OpenXRExtensionWrapper, public OpenXRCompositionLayerProvider {
 	GDCLASS(OpenXRExtensionWrapperExtension, Object);
@@ -59,6 +60,7 @@ public:
 	virtual void *set_session_create_and_get_next_pointer(void *p_next_pointer) override;
 	virtual void *set_swapchain_create_info_and_get_next_pointer(void *p_next_pointer) override;
 	virtual void *set_hand_joint_locations_and_get_next_pointer(int p_hand_index, void *p_next_pointer) override;
+
 	virtual int get_composition_layer_count() override;
 	virtual XrCompositionLayerBaseHeader *get_composition_layer(int p_index) override;
 	virtual int get_composition_layer_order(int p_index) override;
@@ -117,6 +119,16 @@ public:
 
 	GDVIRTUAL1R(bool, _on_event_polled, GDExtensionConstPtr<void>);
 
+	virtual void *set_viewport_composition_layer_and_get_next_pointer(const XrCompositionLayerBaseHeader *p_layer, Dictionary p_property_values, void *p_next_pointer) override;
+	virtual void on_viewport_composition_layer_destroyed(const XrCompositionLayerBaseHeader *p_layer) override;
+	virtual void get_viewport_composition_layer_extension_properties(List<PropertyInfo> *p_property_list) override;
+	virtual Dictionary get_viewport_composition_layer_extension_property_defaults() override;
+
+	GDVIRTUAL3R(uint64_t, _set_viewport_composition_layer_and_get_next_pointer, GDExtensionConstPtr<void>, Dictionary, GDExtensionPtr<void>);
+	GDVIRTUAL1(_on_viewport_composition_layer_destroyed, GDExtensionConstPtr<void>);
+	GDVIRTUAL0R(TypedArray<Dictionary>, _get_viewport_composition_layer_extension_properties);
+	GDVIRTUAL0R(Dictionary, _get_viewport_composition_layer_extension_property_defaults);
+
 	Ref<OpenXRAPIExtension> get_openxr_api();
 
 	void register_extension_wrapper();
diff --git a/modules/openxr/extensions/openxr_fb_display_refresh_rate_extension.cpp b/modules/openxr/extensions/openxr_fb_display_refresh_rate_extension.cpp
index 0ef7070531..402389144a 100644
--- a/modules/openxr/extensions/openxr_fb_display_refresh_rate_extension.cpp
+++ b/modules/openxr/extensions/openxr_fb_display_refresh_rate_extension.cpp
@@ -29,6 +29,7 @@
 /**************************************************************************/
 
 #include "openxr_fb_display_refresh_rate_extension.h"
+#include "../openxr_interface.h"
 
 OpenXRDisplayRefreshRateExtension *OpenXRDisplayRefreshRateExtension::singleton = nullptr;
 
@@ -64,6 +65,23 @@ void OpenXRDisplayRefreshRateExtension::on_instance_destroyed() {
 	display_refresh_rate_ext = false;
 }
 
+bool OpenXRDisplayRefreshRateExtension::on_event_polled(const XrEventDataBuffer &event) {
+	switch (event.type) {
+		case XR_TYPE_EVENT_DATA_DISPLAY_REFRESH_RATE_CHANGED_FB: {
+			const XrEventDataDisplayRefreshRateChangedFB *event_fb = (XrEventDataDisplayRefreshRateChangedFB *)&event;
+
+			OpenXRInterface *xr_interface = OpenXRAPI::get_singleton()->get_xr_interface();
+			if (xr_interface) {
+				xr_interface->on_refresh_rate_changes(event_fb->toDisplayRefreshRate);
+			}
+
+			return true;
+		} break;
+		default:
+			return false;
+	}
+}
+
 float OpenXRDisplayRefreshRateExtension::get_refresh_rate() const {
 	float refresh_rate = 0.0;
 
diff --git a/modules/openxr/extensions/openxr_fb_display_refresh_rate_extension.h b/modules/openxr/extensions/openxr_fb_display_refresh_rate_extension.h
index d8015fe600..1048d245a4 100644
--- a/modules/openxr/extensions/openxr_fb_display_refresh_rate_extension.h
+++ b/modules/openxr/extensions/openxr_fb_display_refresh_rate_extension.h
@@ -51,6 +51,7 @@ public:
 
 	virtual void on_instance_created(const XrInstance p_instance) override;
 	virtual void on_instance_destroyed() override;
+	virtual bool on_event_polled(const XrEventDataBuffer &event) override;
 
 	float get_refresh_rate() const;
 	void set_refresh_rate(float p_refresh_rate);
diff --git a/modules/openxr/extensions/platform/openxr_vulkan_extension.cpp b/modules/openxr/extensions/platform/openxr_vulkan_extension.cpp
index f5e7fc192c..da613f8435 100644
--- a/modules/openxr/extensions/platform/openxr_vulkan_extension.cpp
+++ b/modules/openxr/extensions/platform/openxr_vulkan_extension.cpp
@@ -229,6 +229,10 @@ void OpenXRVulkanExtension::get_usable_swapchain_formats(Vector<int64_t> &p_usab
 }
 
 void OpenXRVulkanExtension::get_usable_depth_formats(Vector<int64_t> &p_usable_swap_chains) {
+	// Note, it is very likely we do NOT support any of depth formats where we can combine our stencil support (e.g. _S8_UINT).
+	// Right now this isn't a problem but once stencil support becomes an issue, we need to check for this in the rendering engine
+	// and create a separate buffer for the stencil.
+
 	p_usable_swap_chains.push_back(VK_FORMAT_D24_UNORM_S8_UINT);
 	p_usable_swap_chains.push_back(VK_FORMAT_D32_SFLOAT_S8_UINT);
 	p_usable_swap_chains.push_back(VK_FORMAT_D32_SFLOAT);
diff --git a/modules/openxr/openxr_api.cpp b/modules/openxr/openxr_api.cpp
index 91a4839a06..1fe402341b 100644
--- a/modules/openxr/openxr_api.cpp
+++ b/modules/openxr/openxr_api.cpp
@@ -63,6 +63,198 @@
 #define OPENXR_LOADER_NAME "libopenxr_loader.so"
 #endif
 
+////////////////////////////////////
+// OpenXRAPI::OpenXRSwapChainInfo
+
+Vector<OpenXRAPI::OpenXRSwapChainInfo> OpenXRAPI::OpenXRSwapChainInfo::free_queue;
+
+bool OpenXRAPI::OpenXRSwapChainInfo::create(XrSwapchainCreateFlags p_create_flags, XrSwapchainUsageFlags p_usage_flags, int64_t p_swapchain_format, uint32_t p_width, uint32_t p_height, uint32_t p_sample_count, uint32_t p_array_size) {
+	OpenXRAPI *openxr_api = OpenXRAPI::get_singleton();
+	ERR_FAIL_NULL_V(openxr_api, false);
+
+	XrSession xr_session = openxr_api->get_session();
+	ERR_FAIL_COND_V(xr_session == XR_NULL_HANDLE, false);
+
+	OpenXRGraphicsExtensionWrapper *xr_graphics_extension = openxr_api->get_graphics_extension();
+	ERR_FAIL_NULL_V(xr_graphics_extension, false);
+
+	// We already have a swapchain?
+	ERR_FAIL_COND_V(swapchain != XR_NULL_HANDLE, false);
+
+	XrResult result;
+
+	void *next_pointer = nullptr;
+	for (OpenXRExtensionWrapper *wrapper : openxr_api->get_registered_extension_wrappers()) {
+		void *np = wrapper->set_swapchain_create_info_and_get_next_pointer(next_pointer);
+		if (np != nullptr) {
+			next_pointer = np;
+		}
+	}
+
+	XrSwapchainCreateInfo swapchain_create_info = {
+		XR_TYPE_SWAPCHAIN_CREATE_INFO, // type
+		next_pointer, // next
+		p_create_flags, // createFlags
+		p_usage_flags, // usageFlags
+		p_swapchain_format, // format
+		p_sample_count, // sampleCount
+		p_width, // width
+		p_height, // height
+		1, // faceCount
+		p_array_size, // arraySize
+		1 // mipCount
+	};
+
+	XrSwapchain new_swapchain;
+	result = openxr_api->xrCreateSwapchain(xr_session, &swapchain_create_info, &new_swapchain);
+	if (XR_FAILED(result)) {
+		print_line("OpenXR: Failed to get swapchain [", openxr_api->get_error_string(result), "]");
+		return false;
+	}
+
+	if (!xr_graphics_extension->get_swapchain_image_data(new_swapchain, p_swapchain_format, p_width, p_height, p_sample_count, p_array_size, &swapchain_graphics_data)) {
+		openxr_api->xrDestroySwapchain(new_swapchain);
+		return false;
+	}
+
+	swapchain = new_swapchain;
+
+	return true;
+}
+
+void OpenXRAPI::OpenXRSwapChainInfo::queue_free() {
+	if (image_acquired) {
+		release();
+	}
+
+	if (swapchain != XR_NULL_HANDLE) {
+		free_queue.push_back(*this);
+
+		swapchain_graphics_data = nullptr;
+		swapchain = XR_NULL_HANDLE;
+	}
+}
+
+void OpenXRAPI::OpenXRSwapChainInfo::free_queued() {
+	for (OpenXRAPI::OpenXRSwapChainInfo &swapchain_info : free_queue) {
+		swapchain_info.free();
+	}
+	free_queue.clear();
+}
+
+void OpenXRAPI::OpenXRSwapChainInfo::free() {
+	OpenXRAPI *openxr_api = OpenXRAPI::get_singleton();
+	ERR_FAIL_NULL(openxr_api);
+
+	if (image_acquired) {
+		release();
+	}
+
+	if (openxr_api->get_graphics_extension() && swapchain_graphics_data != nullptr) {
+		openxr_api->get_graphics_extension()->cleanup_swapchain_graphics_data(&swapchain_graphics_data);
+	}
+
+	if (swapchain != XR_NULL_HANDLE) {
+		openxr_api->xrDestroySwapchain(swapchain);
+		swapchain = XR_NULL_HANDLE;
+	}
+}
+
+bool OpenXRAPI::OpenXRSwapChainInfo::acquire(XrBool32 &p_should_render) {
+	ERR_FAIL_COND_V(image_acquired, true); // This was not released when it should be, error out and reuse...
+
+	OpenXRAPI *openxr_api = OpenXRAPI::get_singleton();
+	ERR_FAIL_NULL_V(openxr_api, false);
+
+	XrResult result;
+
+	if (!skip_acquire_swapchain) {
+		XrSwapchainImageAcquireInfo swapchain_image_acquire_info = {
+			XR_TYPE_SWAPCHAIN_IMAGE_ACQUIRE_INFO, // type
+			nullptr // next
+		};
+
+		result = openxr_api->xrAcquireSwapchainImage(swapchain, &swapchain_image_acquire_info, &image_index);
+		if (!XR_UNQUALIFIED_SUCCESS(result)) {
+			// Make sure end_frame knows we need to submit an empty frame
+			p_should_render = false;
+
+			if (XR_FAILED(result)) {
+				// Unexpected failure, log this!
+				print_line("OpenXR: failed to acquire swapchain image [", openxr_api->get_error_string(result), "]");
+				return false;
+			} else {
+				// In this scenario we silently fail, the XR runtime is simply not ready yet to acquire the swapchain.
+				return false;
+			}
+		}
+	}
+
+	XrSwapchainImageWaitInfo swapchain_image_wait_info = {
+		XR_TYPE_SWAPCHAIN_IMAGE_WAIT_INFO, // type
+		nullptr, // next
+		17000000 // timeout in nanoseconds
+	};
+
+	result = openxr_api->xrWaitSwapchainImage(swapchain, &swapchain_image_wait_info);
+	if (!XR_UNQUALIFIED_SUCCESS(result)) {
+		// Make sure end_frame knows we need to submit an empty frame
+		p_should_render = false;
+
+		if (XR_FAILED(result)) {
+			// Unexpected failure, log this!
+			print_line("OpenXR: failed to wait for swapchain image [", openxr_api->get_error_string(result), "]");
+			return false;
+		} else {
+			// Make sure to skip trying to acquire the swapchain image in the next frame
+			skip_acquire_swapchain = true;
+			return false;
+		}
+	} else {
+		skip_acquire_swapchain = false;
+	}
+
+	image_acquired = true;
+	return true;
+}
+
+bool OpenXRAPI::OpenXRSwapChainInfo::release() {
+	if (!image_acquired) {
+		// Already released or never acquired.
+		return true;
+	}
+
+	image_acquired = false; // Regardless if we succeed or not, consider this released.
+
+	OpenXRAPI *openxr_api = OpenXRAPI::get_singleton();
+	ERR_FAIL_NULL_V(openxr_api, false);
+
+	XrSwapchainImageReleaseInfo swapchain_image_release_info = {
+		XR_TYPE_SWAPCHAIN_IMAGE_RELEASE_INFO, // type
+		nullptr // next
+	};
+	XrResult result = openxr_api->xrReleaseSwapchainImage(swapchain, &swapchain_image_release_info);
+	if (XR_FAILED(result)) {
+		print_line("OpenXR: failed to release swapchain image! [", openxr_api->get_error_string(result), "]");
+		return false;
+	}
+
+	return true;
+}
+
+RID OpenXRAPI::OpenXRSwapChainInfo::get_image() {
+	OpenXRAPI *openxr_api = OpenXRAPI::get_singleton();
+
+	if (image_acquired && openxr_api && openxr_api->get_graphics_extension()) {
+		return OpenXRAPI::get_singleton()->get_graphics_extension()->get_texture(swapchain_graphics_data, image_index);
+	} else {
+		return RID();
+	}
+}
+
+////////////////////////////////////
+// OpenXRAPI
+
 OpenXRAPI *OpenXRAPI::singleton = nullptr;
 Vector<OpenXRExtensionWrapper *> OpenXRAPI::registered_extension_wrappers;
 
@@ -568,6 +760,21 @@ bool OpenXRAPI::load_supported_view_configuration_views(XrViewConfigurationType
 		print_verbose(String(" - recommended render sample count: ") + itos(view_configuration_views[i].recommendedSwapchainSampleCount));
 	}
 
+	// Allocate buffers we'll be populating with view information.
+	views = (XrView *)memalloc(sizeof(XrView) * view_count);
+	ERR_FAIL_NULL_V_MSG(views, false, "OpenXR Couldn't allocate memory for views");
+	memset(views, 0, sizeof(XrView) * view_count);
+
+	projection_views = (XrCompositionLayerProjectionView *)memalloc(sizeof(XrCompositionLayerProjectionView) * view_count);
+	ERR_FAIL_NULL_V_MSG(projection_views, false, "OpenXR Couldn't allocate memory for projection views");
+	memset(projection_views, 0, sizeof(XrCompositionLayerProjectionView) * view_count);
+
+	if (submit_depth_buffer && OpenXRCompositionLayerDepthExtension::get_singleton()->is_available()) {
+		depth_views = (XrCompositionLayerDepthInfoKHR *)memalloc(sizeof(XrCompositionLayerDepthInfoKHR) * view_count);
+		ERR_FAIL_NULL_V_MSG(depth_views, false, "OpenXR Couldn't allocate memory for depth views");
+		memset(depth_views, 0, sizeof(XrCompositionLayerDepthInfoKHR) * view_count);
+	}
+
 	return true;
 }
 
@@ -878,31 +1085,10 @@ bool OpenXRAPI::is_swapchain_format_supported(int64_t p_swapchain_format) {
 	return false;
 }
 
-bool OpenXRAPI::create_swapchains() {
+bool OpenXRAPI::obtain_swapchain_formats() {
 	ERR_FAIL_NULL_V(graphics_extension, false);
 	ERR_FAIL_COND_V(session == XR_NULL_HANDLE, false);
 
-	/*
-		TODO: We need to improve on this, for now we're taking our old approach of creating our main swapchains and substituting
-		those for the ones Godot normally creates.
-		This however means we can only use swapchains for our main XR view.
-
-		It would have been nicer if we could override the swapchain creation in Godot with ours but we have a timing issue here.
-		We can't create XR swapchains until after our XR session is fully instantiated, yet Godot creates its swapchain much earlier.
-
-		Also Godot only creates a swapchain for the main output.
-		OpenXR will require us to create swapchains as the render target for additional viewports if we want to use the layer system
-		to optimize text rendering and background rendering as OpenXR may choose to reuse the results for reprojection while we're
-		already rendering the next frame.
-
-		Finally an area we need to expand upon is that Foveated rendering is only enabled for the swap chain we create,
-		as we render 3D content into internal buffers that are copied into the swapchain, we do now have (basic) VRS support
-	*/
-
-	Size2 recommended_size = get_recommended_target_size();
-	uint32_t sample_count = 1;
-
-	// We start with our color swapchain...
 	{
 		// Build a vector with swapchain formats we want to use, from best fit to worst
 		Vector<int64_t> usable_swapchain_formats;
@@ -923,23 +1109,9 @@ bool OpenXRAPI::create_swapchains() {
 		} else {
 			print_verbose(String("Using color swap chain format:") + get_swapchain_format_name(color_swapchain_format));
 		}
-
-		if (!create_swapchain(0, XR_SWAPCHAIN_USAGE_SAMPLED_BIT | XR_SWAPCHAIN_USAGE_COLOR_ATTACHMENT_BIT | XR_SWAPCHAIN_USAGE_MUTABLE_FORMAT_BIT, color_swapchain_format, recommended_size.width, recommended_size.height, sample_count, view_count, swapchains[OPENXR_SWAPCHAIN_COLOR].swapchain, &swapchains[OPENXR_SWAPCHAIN_COLOR].swapchain_graphics_data)) {
-			return false;
-		}
 	}
 
-	views = (XrView *)memalloc(sizeof(XrView) * view_count);
-	ERR_FAIL_NULL_V_MSG(views, false, "OpenXR Couldn't allocate memory for views");
-
-	projection_views = (XrCompositionLayerProjectionView *)memalloc(sizeof(XrCompositionLayerProjectionView) * view_count);
-	ERR_FAIL_NULL_V_MSG(projection_views, false, "OpenXR Couldn't allocate memory for projection views");
-
-	// We create our depth swapchain if:
-	// - we've enabled submitting depth buffer
-	// - we support our depth layer extension
-	// - we have our spacewarp extension (not yet implemented)
-	if (submit_depth_buffer && OpenXRCompositionLayerDepthExtension::get_singleton()->is_available()) {
+	{
 		// Build a vector with swapchain formats we want to use, from best fit to worst
 		Vector<int64_t> usable_swapchain_formats;
 		depth_swapchain_format = 0;
@@ -954,18 +1126,51 @@ bool OpenXRAPI::create_swapchains() {
 		}
 
 		if (depth_swapchain_format == 0) {
-			print_line("Couldn't find usable depth swap chain format, depth buffer will not be submitted.");
+			WARN_PRINT_ONCE("Couldn't find usable depth swap chain format, depth buffer will not be submitted if requested.");
 		} else {
 			print_verbose(String("Using depth swap chain format:") + get_swapchain_format_name(depth_swapchain_format));
+		}
+	}
 
-			// Note, if VK_FORMAT_D32_SFLOAT is used here but we're using the forward+ renderer, we should probably output a warning.
+	return true;
+}
 
-			if (!create_swapchain(0, XR_SWAPCHAIN_USAGE_SAMPLED_BIT | XR_SWAPCHAIN_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, depth_swapchain_format, recommended_size.width, recommended_size.height, sample_count, view_count, swapchains[OPENXR_SWAPCHAIN_DEPTH].swapchain, &swapchains[OPENXR_SWAPCHAIN_DEPTH].swapchain_graphics_data)) {
-				return false;
-			}
+bool OpenXRAPI::create_main_swapchains(Size2i p_size) {
+	ERR_FAIL_NULL_V(graphics_extension, false);
+	ERR_FAIL_COND_V(session == XR_NULL_HANDLE, false);
 
-			depth_views = (XrCompositionLayerDepthInfoKHR *)memalloc(sizeof(XrCompositionLayerDepthInfoKHR) * view_count);
-			ERR_FAIL_NULL_V_MSG(depth_views, false, "OpenXR Couldn't allocate memory for depth views");
+	/*
+		TODO: We need to improve on this, for now we're taking our old approach of creating our main swapchains and substituting
+		those for the ones Godot normally creates.
+		This however means we can only use swapchains for our main XR view.
+
+		It would have been nicer if we could override the swapchain creation in Godot with ours but we have a timing issue here.
+		We can't create XR swapchains until after our XR session is fully instantiated, yet Godot creates its swapchain much earlier.
+
+		We only creates a swapchain for the main output here.
+		Additional swapchains may be created through our composition layer extension.
+
+		Finally an area we need to expand upon is that Foveated rendering is only enabled for the swap chain we create,
+		as we render 3D content into internal buffers that are copied into the swapchain, we do now have (basic) VRS support
+	*/
+
+	main_swapchain_size = p_size;
+	uint32_t sample_count = 1;
+
+	// We start with our color swapchain...
+	if (color_swapchain_format != 0) {
+		if (!main_swapchains[OPENXR_SWAPCHAIN_COLOR].create(0, XR_SWAPCHAIN_USAGE_SAMPLED_BIT | XR_SWAPCHAIN_USAGE_COLOR_ATTACHMENT_BIT | XR_SWAPCHAIN_USAGE_MUTABLE_FORMAT_BIT, color_swapchain_format, main_swapchain_size.width, main_swapchain_size.height, sample_count, view_count)) {
+			return false;
+		}
+	}
+
+	// We create our depth swapchain if:
+	// - we've enabled submitting depth buffer
+	// - we support our depth layer extension
+	// - we have our spacewarp extension (not yet implemented)
+	if (depth_swapchain_format != 0 && submit_depth_buffer && OpenXRCompositionLayerDepthExtension::get_singleton()->is_available()) {
+		if (!main_swapchains[OPENXR_SWAPCHAIN_DEPTH].create(0, XR_SWAPCHAIN_USAGE_SAMPLED_BIT | XR_SWAPCHAIN_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT, depth_swapchain_format, main_swapchain_size.width, main_swapchain_size.height, sample_count, view_count)) {
+			return false;
 		}
 	}
 
@@ -981,28 +1186,30 @@ bool OpenXRAPI::create_swapchains() {
 
 		projection_views[i].type = XR_TYPE_COMPOSITION_LAYER_PROJECTION_VIEW;
 		projection_views[i].next = nullptr;
-		projection_views[i].subImage.swapchain = swapchains[OPENXR_SWAPCHAIN_COLOR].swapchain;
+		projection_views[i].subImage.swapchain = main_swapchains[OPENXR_SWAPCHAIN_COLOR].get_swapchain();
 		projection_views[i].subImage.imageArrayIndex = i;
 		projection_views[i].subImage.imageRect.offset.x = 0;
 		projection_views[i].subImage.imageRect.offset.y = 0;
-		projection_views[i].subImage.imageRect.extent.width = recommended_size.width;
-		projection_views[i].subImage.imageRect.extent.height = recommended_size.height;
+		projection_views[i].subImage.imageRect.extent.width = main_swapchain_size.width;
+		projection_views[i].subImage.imageRect.extent.height = main_swapchain_size.height;
 
 		if (submit_depth_buffer && OpenXRCompositionLayerDepthExtension::get_singleton()->is_available() && depth_views) {
 			projection_views[i].next = &depth_views[i];
 
 			depth_views[i].type = XR_TYPE_COMPOSITION_LAYER_DEPTH_INFO_KHR;
 			depth_views[i].next = nullptr;
-			depth_views[i].subImage.swapchain = swapchains[OPENXR_SWAPCHAIN_DEPTH].swapchain;
+			depth_views[i].subImage.swapchain = main_swapchains[OPENXR_SWAPCHAIN_DEPTH].get_swapchain();
 			depth_views[i].subImage.imageArrayIndex = i;
 			depth_views[i].subImage.imageRect.offset.x = 0;
 			depth_views[i].subImage.imageRect.offset.y = 0;
-			depth_views[i].subImage.imageRect.extent.width = recommended_size.width;
-			depth_views[i].subImage.imageRect.extent.height = recommended_size.height;
+			depth_views[i].subImage.imageRect.extent.width = main_swapchain_size.width;
+			depth_views[i].subImage.imageRect.extent.height = main_swapchain_size.height;
+			// OpenXR spec says that: minDepth < maxDepth.
 			depth_views[i].minDepth = 0.0;
 			depth_views[i].maxDepth = 1.0;
-			depth_views[i].nearZ = 0.01; // Near and far Z will be set to the correct values in fill_projection_matrix
-			depth_views[i].farZ = 100.0;
+			// But we can reverse near and far for reverse-Z.
+			depth_views[i].nearZ = 100.0; // Near and far Z will be set to the correct values in fill_projection_matrix
+			depth_views[i].farZ = 0.01;
 		}
 	};
 
@@ -1029,9 +1236,8 @@ void OpenXRAPI::destroy_session() {
 		depth_views = nullptr;
 	}
 
-	for (int i = 0; i < OPENXR_SWAPCHAIN_MAX; i++) {
-		free_swapchain(swapchains[i]);
-	}
+	free_main_swapchains();
+	OpenXRSwapChainInfo::free_queued();
 
 	if (supported_swapchain_formats != nullptr) {
 		memfree(supported_swapchain_formats);
@@ -1064,51 +1270,6 @@ void OpenXRAPI::destroy_session() {
 	}
 }
 
-bool OpenXRAPI::create_swapchain(XrSwapchainCreateFlags p_create_flags, XrSwapchainUsageFlags p_usage_flags, int64_t p_swapchain_format, uint32_t p_width, uint32_t p_height, uint32_t p_sample_count, uint32_t p_array_size, XrSwapchain &r_swapchain, void **r_swapchain_graphics_data) {
-	ERR_FAIL_COND_V(session == XR_NULL_HANDLE, false);
-	ERR_FAIL_NULL_V(graphics_extension, false);
-
-	XrResult result;
-
-	void *next_pointer = nullptr;
-	for (OpenXRExtensionWrapper *wrapper : registered_extension_wrappers) {
-		void *np = wrapper->set_swapchain_create_info_and_get_next_pointer(next_pointer);
-		if (np != nullptr) {
-			next_pointer = np;
-		}
-	}
-
-	XrSwapchainCreateInfo swapchain_create_info = {
-		XR_TYPE_SWAPCHAIN_CREATE_INFO, // type
-		next_pointer, // next
-		p_create_flags, // createFlags
-		p_usage_flags, // usageFlags
-		p_swapchain_format, // format
-		p_sample_count, // sampleCount
-		p_width, // width
-		p_height, // height
-		1, // faceCount
-		p_array_size, // arraySize
-		1 // mipCount
-	};
-
-	XrSwapchain new_swapchain;
-	result = xrCreateSwapchain(session, &swapchain_create_info, &new_swapchain);
-	if (XR_FAILED(result)) {
-		print_line("OpenXR: Failed to get swapchain [", get_error_string(result), "]");
-		return false;
-	}
-
-	if (!graphics_extension->get_swapchain_image_data(new_swapchain, p_swapchain_format, p_width, p_height, p_sample_count, p_array_size, r_swapchain_graphics_data)) {
-		xrDestroySwapchain(new_swapchain);
-		return false;
-	}
-
-	r_swapchain = new_swapchain;
-
-	return true;
-}
-
 bool OpenXRAPI::on_state_idle() {
 	print_verbose("On state idle");
 
@@ -1135,17 +1296,6 @@ bool OpenXRAPI::on_state_ready() {
 		return false;
 	}
 
-	// This is when we create our swapchain, this can be a "long" time after Godot finishes, we can deal with this for now
-	// but once we want to provide Viewports for additional layers where OpenXR requires us to create further swapchains,
-	// we'll be creating those viewport WAY before we reach this point.
-	// We may need to implement a wait in our init in main.cpp polling our events until the session is ready.
-	// That will be very very ugly
-	// The other possibility is to create a separate OpenXRViewport type specifically for this goal as part of our OpenXR module
-
-	if (!create_swapchains()) {
-		return false;
-	}
-
 	// we're running
 	running = true;
 
@@ -1157,8 +1307,6 @@ bool OpenXRAPI::on_state_ready() {
 		xr_interface->on_state_ready();
 	}
 
-	// TODO Tell android
-
 	return true;
 }
 
@@ -1492,6 +1640,11 @@ bool OpenXRAPI::initialize_session() {
 		return false;
 	}
 
+	if (!obtain_swapchain_formats()) {
+		destroy_session();
+		return false;
+	}
+
 	return true;
 }
 
@@ -1651,8 +1804,9 @@ bool OpenXRAPI::get_view_projection(uint32_t p_view, double p_z_near, double p_z
 	// if we're using depth views, make sure we update our near and far there...
 	if (depth_views != nullptr) {
 		for (uint32_t i = 0; i < view_count; i++) {
-			depth_views[i].nearZ = p_z_near;
-			depth_views[i].farZ = p_z_far;
+			// As we are using reverse-Z these need to be flipped.
+			depth_views[i].nearZ = p_z_far;
+			depth_views[i].farZ = p_z_near;
 		}
 	}
 
@@ -1798,103 +1952,10 @@ bool OpenXRAPI::process() {
 	return true;
 }
 
-void OpenXRAPI::free_swapchain(OpenXRSwapChainInfo &p_swapchain) {
-	if (p_swapchain.image_acquired) {
-		release_image(p_swapchain);
-	}
-
-	if (graphics_extension && p_swapchain.swapchain_graphics_data != nullptr) {
-		graphics_extension->cleanup_swapchain_graphics_data(&p_swapchain.swapchain_graphics_data);
-	}
-
-	if (p_swapchain.swapchain != XR_NULL_HANDLE) {
-		xrDestroySwapchain(p_swapchain.swapchain);
-		p_swapchain.swapchain = XR_NULL_HANDLE;
-	}
-}
-
-bool OpenXRAPI::acquire_image(OpenXRSwapChainInfo &p_swapchain) {
-	ERR_FAIL_COND_V(p_swapchain.image_acquired, true); // This was not released when it should be, error out and reuse...
-
-	XrResult result;
-
-	if (!p_swapchain.skip_acquire_swapchain) {
-		XrSwapchainImageAcquireInfo swapchain_image_acquire_info = {
-			XR_TYPE_SWAPCHAIN_IMAGE_ACQUIRE_INFO, // type
-			nullptr // next
-		};
-
-		result = xrAcquireSwapchainImage(p_swapchain.swapchain, &swapchain_image_acquire_info, &p_swapchain.image_index);
-		if (!XR_UNQUALIFIED_SUCCESS(result)) {
-			// Make sure end_frame knows we need to submit an empty frame
-			frame_state.shouldRender = false;
-
-			if (XR_FAILED(result)) {
-				// Unexpected failure, log this!
-				print_line("OpenXR: failed to acquire swapchain image [", get_error_string(result), "]");
-				return false;
-			} else {
-				// In this scenario we silently fail, the XR runtime is simply not ready yet to acquire the swapchain.
-				return false;
-			}
-		}
-	}
-
-	XrSwapchainImageWaitInfo swapchain_image_wait_info = {
-		XR_TYPE_SWAPCHAIN_IMAGE_WAIT_INFO, // type
-		nullptr, // next
-		17000000 // timeout in nanoseconds
-	};
-
-	result = xrWaitSwapchainImage(p_swapchain.swapchain, &swapchain_image_wait_info);
-	if (!XR_UNQUALIFIED_SUCCESS(result)) {
-		// Make sure end_frame knows we need to submit an empty frame
-		frame_state.shouldRender = false;
-
-		if (XR_FAILED(result)) {
-			// Unexpected failure, log this!
-			print_line("OpenXR: failed to wait for swapchain image [", get_error_string(result), "]");
-			return false;
-		} else {
-			// Make sure to skip trying to acquire the swapchain image in the next frame
-			p_swapchain.skip_acquire_swapchain = true;
-			return false;
-		}
-	} else {
-		p_swapchain.skip_acquire_swapchain = false;
-	}
-
-	p_swapchain.image_acquired = true;
-	return true;
-}
-
-RID OpenXRAPI::get_image(OpenXRSwapChainInfo &p_swapchain) {
-	if (p_swapchain.image_acquired) {
-		return graphics_extension->get_texture(p_swapchain.swapchain_graphics_data, p_swapchain.image_index);
-	} else {
-		return RID();
-	}
-}
-
-bool OpenXRAPI::release_image(OpenXRSwapChainInfo &p_swapchain) {
-	if (!p_swapchain.image_acquired) {
-		// Already released or never acquired.
-		return true;
-	}
-
-	p_swapchain.image_acquired = false; // Regardless if we succeed or not, consider this released.
-
-	XrSwapchainImageReleaseInfo swapchain_image_release_info = {
-		XR_TYPE_SWAPCHAIN_IMAGE_RELEASE_INFO, // type
-		nullptr // next
-	};
-	XrResult result = xrReleaseSwapchainImage(p_swapchain.swapchain, &swapchain_image_release_info);
-	if (XR_FAILED(result)) {
-		print_line("OpenXR: failed to release swapchain image! [", get_error_string(result), "]");
-		return false;
+void OpenXRAPI::free_main_swapchains() {
+	for (int i = 0; i < OPENXR_SWAPCHAIN_MAX; i++) {
+		main_swapchains[i].queue_free();
 	}
-
-	return true;
 }
 
 void OpenXRAPI::pre_render() {
@@ -1904,6 +1965,18 @@ void OpenXRAPI::pre_render() {
 		return;
 	}
 
+	// Process any swapchains that were queued to be freed
+	OpenXRSwapChainInfo::free_queued();
+
+	Size2i swapchain_size = get_recommended_target_size();
+	if (swapchain_size != main_swapchain_size) {
+		// Out with the old.
+		free_main_swapchains();
+
+		// In with the new.
+		create_main_swapchains(swapchain_size);
+	}
+
 	// Waitframe does 2 important things in our process:
 	// 1) It provides us with predictive timing, telling us when OpenXR expects to display the frame we're about to commit
 	// 2) It will use the previous timing to pause our thread so that rendering starts as close to displaying as possible
@@ -1996,9 +2069,15 @@ void OpenXRAPI::pre_render() {
 		print_line("OpenXR: failed to being frame [", get_error_string(result), "]");
 		return;
 	}
+
+	// Reset this, we haven't found a viewport for output yet
+	has_xr_viewport = false;
 }
 
 bool OpenXRAPI::pre_draw_viewport(RID p_render_target) {
+	// We found an XR viewport!
+	has_xr_viewport = true;
+
 	if (!can_render()) {
 		return false;
 	}
@@ -2007,8 +2086,8 @@ bool OpenXRAPI::pre_draw_viewport(RID p_render_target) {
 
 	// Acquire our images
 	for (int i = 0; i < OPENXR_SWAPCHAIN_MAX; i++) {
-		if (!swapchains[i].image_acquired && swapchains[i].swapchain != XR_NULL_HANDLE) {
-			if (!acquire_image(swapchains[i])) {
+		if (!main_swapchains[i].is_image_acquired() && main_swapchains[i].get_swapchain() != XR_NULL_HANDLE) {
+			if (!main_swapchains[i].acquire(frame_state.shouldRender)) {
 				return false;
 			}
 		}
@@ -2022,17 +2101,17 @@ bool OpenXRAPI::pre_draw_viewport(RID p_render_target) {
 }
 
 XrSwapchain OpenXRAPI::get_color_swapchain() {
-	return swapchains[OPENXR_SWAPCHAIN_COLOR].swapchain;
+	return main_swapchains[OPENXR_SWAPCHAIN_COLOR].get_swapchain();
 }
 
 RID OpenXRAPI::get_color_texture() {
-	return get_image(swapchains[OPENXR_SWAPCHAIN_COLOR]);
+	return main_swapchains[OPENXR_SWAPCHAIN_COLOR].get_image();
 }
 
 RID OpenXRAPI::get_depth_texture() {
 	// Note, image will not be acquired if we didn't have a suitable swap chain format.
 	if (submit_depth_buffer) {
-		return get_image(swapchains[OPENXR_SWAPCHAIN_DEPTH]);
+		return main_swapchains[OPENXR_SWAPCHAIN_DEPTH].get_image();
 	} else {
 		return RID();
 	}
@@ -2057,15 +2136,19 @@ void OpenXRAPI::end_frame() {
 		return;
 	}
 
-	if (frame_state.shouldRender && view_pose_valid && !swapchains[OPENXR_SWAPCHAIN_COLOR].image_acquired) {
-		print_line("OpenXR: No viewport was marked with use_xr, there is no rendered output!");
+	if (frame_state.shouldRender && view_pose_valid) {
+		if (!has_xr_viewport) {
+			print_line("OpenXR: No viewport was marked with use_xr, there is no rendered output!");
+		} else if (!main_swapchains[OPENXR_SWAPCHAIN_COLOR].is_image_acquired()) {
+			print_line("OpenXR: No swapchain could be acquired to render to!");
+		}
 	}
 
 	// must have:
 	// - shouldRender set to true
 	// - a valid view pose for projection_views[eye].pose to submit layer
 	// - an image to render
-	if (!frame_state.shouldRender || !view_pose_valid || !swapchains[OPENXR_SWAPCHAIN_COLOR].image_acquired) {
+	if (!frame_state.shouldRender || !view_pose_valid || !main_swapchains[OPENXR_SWAPCHAIN_COLOR].is_image_acquired()) {
 		// submit 0 layers when we shouldn't render
 		XrFrameEndInfo frame_end_info = {
 			XR_TYPE_FRAME_END_INFO, // type
@@ -2087,8 +2170,8 @@ void OpenXRAPI::end_frame() {
 
 	// release our swapchain image if we acquired it
 	for (int i = 0; i < OPENXR_SWAPCHAIN_MAX; i++) {
-		if (swapchains[i].image_acquired) {
-			release_image(swapchains[i]);
+		if (main_swapchains[i].is_image_acquired()) {
+			main_swapchains[i].release();
 		}
 	}
 
@@ -2332,7 +2415,7 @@ OpenXRAPI::OpenXRAPI() {
 		submit_depth_buffer = GLOBAL_GET("xr/openxr/submit_depth_buffer");
 	}
 
-	// reset a few things that can't be done in our class definition
+	// Reset a few things that can't be done in our class definition.
 	frame_state.predictedDisplayTime = 0;
 	frame_state.predictedDisplayPeriod = 0;
 }
diff --git a/modules/openxr/openxr_api.h b/modules/openxr/openxr_api.h
index 615643591f..e835366200 100644
--- a/modules/openxr/openxr_api.h
+++ b/modules/openxr/openxr_api.h
@@ -58,12 +58,28 @@ class OpenXRInterface;
 
 class OpenXRAPI {
 public:
-	struct OpenXRSwapChainInfo {
+	class OpenXRSwapChainInfo {
+	private:
 		XrSwapchain swapchain = XR_NULL_HANDLE;
 		void *swapchain_graphics_data = nullptr;
 		uint32_t image_index = 0;
 		bool image_acquired = false;
 		bool skip_acquire_swapchain = false;
+
+		static Vector<OpenXRSwapChainInfo> free_queue;
+
+	public:
+		_FORCE_INLINE_ XrSwapchain get_swapchain() const { return swapchain; }
+		_FORCE_INLINE_ bool is_image_acquired() const { return image_acquired; }
+
+		bool create(XrSwapchainCreateFlags p_create_flags, XrSwapchainUsageFlags p_usage_flags, int64_t p_swapchain_format, uint32_t p_width, uint32_t p_height, uint32_t p_sample_count, uint32_t p_array_size);
+		void queue_free();
+		static void free_queued();
+		void free();
+
+		bool acquire(XrBool32 &p_should_render);
+		bool release();
+		RID get_image();
 	};
 
 private:
@@ -148,12 +164,14 @@ private:
 
 	int64_t color_swapchain_format = 0;
 	int64_t depth_swapchain_format = 0;
-	OpenXRSwapChainInfo swapchains[OPENXR_SWAPCHAIN_MAX];
+	Size2i main_swapchain_size = { 0, 0 };
+	OpenXRSwapChainInfo main_swapchains[OPENXR_SWAPCHAIN_MAX];
 
 	XrSpace play_space = XR_NULL_HANDLE;
 	XrSpace view_space = XR_NULL_HANDLE;
 	bool view_pose_valid = false;
 	XRPose::TrackingConfidence head_pose_confidence = XRPose::XR_TRACKING_CONFIDENCE_NONE;
+	bool has_xr_viewport = false;
 
 	bool emulating_local_floor = false;
 	bool should_reset_emulated_floor_height = false;
@@ -241,7 +259,9 @@ private:
 	bool setup_view_space();
 	bool load_supported_swapchain_formats();
 	bool is_swapchain_format_supported(int64_t p_swapchain_format);
-	bool create_swapchains();
+	bool obtain_swapchain_formats();
+	bool create_main_swapchains(Size2i p_size);
+	void free_main_swapchains();
 	void destroy_session();
 
 	// action map
@@ -312,6 +332,7 @@ public:
 	XrInstance get_instance() const { return instance; };
 	XrSystemId get_system_id() const { return system_id; };
 	XrSession get_session() const { return session; };
+	OpenXRGraphicsExtensionWrapper *get_graphics_extension() const { return graphics_extension; };
 	String get_runtime_name() const { return runtime_name; };
 	String get_runtime_version() const { return runtime_version; };
 
@@ -336,6 +357,7 @@ public:
 	String get_error_string(XrResult result) const;
 	String get_swapchain_format_name(int64_t p_swapchain_format) const;
 
+	OpenXRInterface *get_xr_interface() const { return xr_interface; }
 	void set_xr_interface(OpenXRInterface *p_xr_interface);
 	static void register_extension_wrapper(OpenXRExtensionWrapper *p_extension_wrapper);
 	static void unregister_extension_wrapper(OpenXRExtensionWrapper *p_extension_wrapper);
@@ -405,11 +427,7 @@ public:
 
 	// swapchains
 	int64_t get_color_swapchain_format() const { return color_swapchain_format; }
-	bool create_swapchain(XrSwapchainCreateFlags p_create_flags, XrSwapchainUsageFlags p_usage_flags, int64_t p_swapchain_format, uint32_t p_width, uint32_t p_height, uint32_t p_sample_count, uint32_t p_array_size, XrSwapchain &r_swapchain, void **r_swapchain_graphics_data);
-	void free_swapchain(OpenXRSwapChainInfo &p_swapchain);
-	bool acquire_image(OpenXRSwapChainInfo &p_swapchain);
-	RID get_image(OpenXRSwapChainInfo &p_swapchain);
-	bool release_image(OpenXRSwapChainInfo &p_swapchain);
+	int64_t get_depth_swapchain_format() const { return depth_swapchain_format; }
 
 	// action map
 	String get_default_action_map_resource_name();
diff --git a/modules/openxr/openxr_interface.cpp b/modules/openxr/openxr_interface.cpp
index 956e5ed3f3..7eb9a6ebe1 100644
--- a/modules/openxr/openxr_interface.cpp
+++ b/modules/openxr/openxr_interface.cpp
@@ -43,6 +43,7 @@ void OpenXRInterface::_bind_methods() {
 	ADD_SIGNAL(MethodInfo("session_focussed"));
 	ADD_SIGNAL(MethodInfo("session_visible"));
 	ADD_SIGNAL(MethodInfo("pose_recentered"));
+	ADD_SIGNAL(MethodInfo("refresh_rate_changed", PropertyInfo(Variant::FLOAT, "refresh_rate")));
 
 	// Display refresh rate
 	ClassDB::bind_method(D_METHOD("get_display_refresh_rate"), &OpenXRInterface::get_display_refresh_rate);
@@ -1258,6 +1259,10 @@ void OpenXRInterface::on_pose_recentered() {
 	emit_signal(SNAME("pose_recentered"));
 }
 
+void OpenXRInterface::on_refresh_rate_changes(float p_new_rate) {
+	emit_signal(SNAME("refresh_rate_changed"), p_new_rate);
+}
+
 /** Hand tracking. */
 void OpenXRInterface::set_motion_range(const Hand p_hand, const HandMotionRange p_motion_range) {
 	ERR_FAIL_INDEX(p_hand, HAND_MAX);
diff --git a/modules/openxr/openxr_interface.h b/modules/openxr/openxr_interface.h
index aee9751d6b..737f22d642 100644
--- a/modules/openxr/openxr_interface.h
+++ b/modules/openxr/openxr_interface.h
@@ -174,6 +174,7 @@ public:
 	void on_state_focused();
 	void on_state_stopping();
 	void on_pose_recentered();
+	void on_refresh_rate_changes(float p_new_rate);
 	void tracker_profile_changed(RID p_tracker, RID p_interaction_profile);
 
 	/** Hand tracking. */
diff --git a/modules/openxr/scene/openxr_composition_layer.cpp b/modules/openxr/scene/openxr_composition_layer.cpp
index 120914485f..ce883b79b3 100644
--- a/modules/openxr/scene/openxr_composition_layer.cpp
+++ b/modules/openxr/scene/openxr_composition_layer.cpp
@@ -88,6 +88,8 @@ void OpenXRCompositionLayer::_bind_methods() {
 
 	ClassDB::bind_method(D_METHOD("is_natively_supported"), &OpenXRCompositionLayer::is_natively_supported);
 
+	ClassDB::bind_method(D_METHOD("intersects_ray", "origin", "direction"), &OpenXRCompositionLayer::intersects_ray);
+
 	ADD_PROPERTY(PropertyInfo(Variant::OBJECT, "layer_viewport", PROPERTY_HINT_NODE_TYPE, "SubViewport"), "set_layer_viewport", "get_layer_viewport");
 	ADD_PROPERTY(PropertyInfo(Variant::INT, "sort_order", PROPERTY_HINT_NONE, ""), "set_sort_order", "get_sort_order");
 	ADD_PROPERTY(PropertyInfo(Variant::BOOL, "alpha_blend", PROPERTY_HINT_NONE, ""), "set_alpha_blend", "get_alpha_blend");
@@ -199,6 +201,10 @@ bool OpenXRCompositionLayer::is_natively_supported() const {
 	return false;
 }
 
+Vector2 OpenXRCompositionLayer::intersects_ray(const Vector3 &p_origin, const Vector3 &p_direction) const {
+	return Vector2(-1.0, -1.0);
+}
+
 void OpenXRCompositionLayer::_reset_fallback_material() {
 	ERR_FAIL_NULL(fallback);
 
@@ -236,6 +242,14 @@ void OpenXRCompositionLayer::_reset_fallback_material() {
 
 void OpenXRCompositionLayer::_notification(int p_what) {
 	switch (p_what) {
+		case NOTIFICATION_POSTINITIALIZE: {
+			if (openxr_layer_provider) {
+				for (OpenXRExtensionWrapper *extension : OpenXRAPI::get_registered_extension_wrappers()) {
+					extension_property_values.merge(extension->get_viewport_composition_layer_extension_property_defaults());
+				}
+				openxr_layer_provider->set_extension_property_values(extension_property_values);
+			}
+		} break;
 		case NOTIFICATION_INTERNAL_PROCESS: {
 			if (fallback) {
 				if (should_update_fallback_mesh) {
@@ -260,7 +274,7 @@ void OpenXRCompositionLayer::_notification(int p_what) {
 		} break;
 		case NOTIFICATION_ENTER_TREE: {
 			if (composition_layer_extension) {
-				composition_layer_extension->register_composition_layer_provider(openxr_layer_provider);
+				composition_layer_extension->register_viewport_composition_layer_provider(openxr_layer_provider);
 			}
 
 			if (!fallback && layer_viewport && openxr_api && openxr_api->is_running() && is_visible()) {
@@ -269,7 +283,7 @@ void OpenXRCompositionLayer::_notification(int p_what) {
 		} break;
 		case NOTIFICATION_EXIT_TREE: {
 			if (composition_layer_extension) {
-				composition_layer_extension->unregister_composition_layer_provider(openxr_layer_provider);
+				composition_layer_extension->unregister_viewport_composition_layer_provider(openxr_layer_provider);
 			}
 
 			// When a node is removed in the editor, we need to clear the layer viewport, because otherwise
@@ -285,6 +299,40 @@ void OpenXRCompositionLayer::_notification(int p_what) {
 	}
 }
 
+void OpenXRCompositionLayer::_get_property_list(List<PropertyInfo> *p_property_list) const {
+	List<PropertyInfo> extension_properties;
+	for (OpenXRExtensionWrapper *extension : OpenXRAPI::get_registered_extension_wrappers()) {
+		extension->get_viewport_composition_layer_extension_properties(&extension_properties);
+	}
+
+	for (const PropertyInfo &pinfo : extension_properties) {
+		StringName prop_name = pinfo.name;
+		if (!String(prop_name).contains("/")) {
+			WARN_PRINT_ONCE(vformat("Discarding OpenXRCompositionLayer property name '%s' from extension because it doesn't contain a '/'."));
+			continue;
+		}
+		p_property_list->push_back(pinfo);
+	}
+}
+
+bool OpenXRCompositionLayer::_get(const StringName &p_property, Variant &r_value) const {
+	if (extension_property_values.has(p_property)) {
+		r_value = extension_property_values[p_property];
+	}
+
+	return true;
+}
+
+bool OpenXRCompositionLayer::_set(const StringName &p_property, const Variant &p_value) {
+	extension_property_values[p_property] = p_value;
+
+	if (openxr_layer_provider) {
+		openxr_layer_provider->set_extension_property_values(extension_property_values);
+	}
+
+	return true;
+}
+
 PackedStringArray OpenXRCompositionLayer::get_configuration_warnings() const {
 	PackedStringArray warnings = Node3D::get_configuration_warnings();
 
diff --git a/modules/openxr/scene/openxr_composition_layer.h b/modules/openxr/scene/openxr_composition_layer.h
index f683aea647..9f064379d3 100644
--- a/modules/openxr/scene/openxr_composition_layer.h
+++ b/modules/openxr/scene/openxr_composition_layer.h
@@ -49,6 +49,8 @@ class OpenXRCompositionLayer : public Node3D {
 	MeshInstance3D *fallback = nullptr;
 	bool should_update_fallback_mesh = false;
 
+	Dictionary extension_property_values;
+
 	void _create_fallback_node();
 	void _reset_fallback_material();
 
@@ -60,6 +62,9 @@ protected:
 	static void _bind_methods();
 
 	void _notification(int p_what);
+	void _get_property_list(List<PropertyInfo> *p_property_list) const;
+	bool _get(const StringName &p_property, Variant &r_value) const;
+	bool _set(const StringName &p_property, const Variant &p_value);
 
 	virtual void _on_openxr_session_begun();
 	virtual void _on_openxr_session_stopping();
@@ -84,6 +89,8 @@ public:
 
 	virtual PackedStringArray get_configuration_warnings() const override;
 
+	virtual Vector2 intersects_ray(const Vector3 &p_origin, const Vector3 &p_direction) const;
+
 	OpenXRCompositionLayer();
 	~OpenXRCompositionLayer();
 };
diff --git a/modules/openxr/scene/openxr_composition_layer_cylinder.cpp b/modules/openxr/scene/openxr_composition_layer_cylinder.cpp
index dcc9971ec9..ae5a8da5f3 100644
--- a/modules/openxr/scene/openxr_composition_layer_cylinder.cpp
+++ b/modules/openxr/scene/openxr_composition_layer_cylinder.cpp
@@ -188,3 +188,48 @@ void OpenXRCompositionLayerCylinder::set_fallback_segments(uint32_t p_fallback_s
 uint32_t OpenXRCompositionLayerCylinder::get_fallback_segments() const {
 	return fallback_segments;
 }
+
+Vector2 OpenXRCompositionLayerCylinder::intersects_ray(const Vector3 &p_origin, const Vector3 &p_direction) const {
+	Transform3D cylinder_transform = get_global_transform();
+	Vector3 cylinder_axis = cylinder_transform.basis.get_column(1);
+
+	Vector3 offset = p_origin - cylinder_transform.origin;
+	float a = p_direction.dot(p_direction - cylinder_axis * p_direction.dot(cylinder_axis));
+	float b = 2.0 * (p_direction.dot(offset - cylinder_axis * offset.dot(cylinder_axis)));
+	float c = offset.dot(offset - cylinder_axis * offset.dot(cylinder_axis)) - (radius * radius);
+
+	float discriminant = b * b - 4.0 * a * c;
+	if (discriminant < 0.0) {
+		return Vector2(-1.0, -1.0);
+	}
+
+	float t0 = (-b - Math::sqrt(discriminant)) / (2.0 * a);
+	float t1 = (-b + Math::sqrt(discriminant)) / (2.0 * a);
+	float t = MAX(t0, t1);
+
+	if (t < 0.0) {
+		return Vector2(-1.0, -1.0);
+	}
+	Vector3 intersection = p_origin + p_direction * t;
+
+	Basis correction = cylinder_transform.basis.inverse();
+	correction.rotate(Vector3(0.0, 1.0, 0.0), -Math_PI / 2.0);
+	Vector3 relative_point = correction.xform(intersection - cylinder_transform.origin);
+
+	Vector2 projected_point = Vector2(relative_point.x, relative_point.z);
+	float intersection_angle = Math::atan2(projected_point.y, projected_point.x);
+	if (Math::abs(intersection_angle) > central_angle / 2.0) {
+		return Vector2(-1.0, -1.0);
+	}
+
+	float arc_length = radius * central_angle;
+	float height = aspect_ratio * arc_length;
+	if (Math::abs(relative_point.y) > height / 2.0) {
+		return Vector2(-1.0, -1.0);
+	}
+
+	float u = 0.5 + (intersection_angle / central_angle);
+	float v = 1.0 - (0.5 + (relative_point.y / height));
+
+	return Vector2(u, v);
+}
diff --git a/modules/openxr/scene/openxr_composition_layer_cylinder.h b/modules/openxr/scene/openxr_composition_layer_cylinder.h
index dec7dac529..aed0fabd78 100644
--- a/modules/openxr/scene/openxr_composition_layer_cylinder.h
+++ b/modules/openxr/scene/openxr_composition_layer_cylinder.h
@@ -66,6 +66,8 @@ public:
 	void set_fallback_segments(uint32_t p_fallback_segments);
 	uint32_t get_fallback_segments() const;
 
+	virtual Vector2 intersects_ray(const Vector3 &p_origin, const Vector3 &p_direction) const override;
+
 	OpenXRCompositionLayerCylinder();
 	~OpenXRCompositionLayerCylinder();
 };
diff --git a/modules/openxr/scene/openxr_composition_layer_equirect.cpp b/modules/openxr/scene/openxr_composition_layer_equirect.cpp
index c911dbf6ae..d67e71443c 100644
--- a/modules/openxr/scene/openxr_composition_layer_equirect.cpp
+++ b/modules/openxr/scene/openxr_composition_layer_equirect.cpp
@@ -207,3 +207,54 @@ void OpenXRCompositionLayerEquirect::set_fallback_segments(uint32_t p_fallback_s
 uint32_t OpenXRCompositionLayerEquirect::get_fallback_segments() const {
 	return fallback_segments;
 }
+
+Vector2 OpenXRCompositionLayerEquirect::intersects_ray(const Vector3 &p_origin, const Vector3 &p_direction) const {
+	Transform3D equirect_transform = get_global_transform();
+
+	Vector3 offset = p_origin - equirect_transform.origin;
+	float a = p_direction.dot(p_direction);
+	float b = 2.0 * offset.dot(p_direction);
+	float c = offset.dot(offset) - (radius * radius);
+
+	float discriminant = b * b - 4.0 * a * c;
+	if (discriminant < 0.0) {
+		return Vector2(-1.0, -1.0);
+	}
+
+	float t0 = (-b - Math::sqrt(discriminant)) / (2.0 * a);
+	float t1 = (-b + Math::sqrt(discriminant)) / (2.0 * a);
+	float t = MAX(t0, t1);
+
+	if (t < 0.0) {
+		return Vector2(-1.0, -1.0);
+	}
+	Vector3 intersection = p_origin + p_direction * t;
+
+	Basis correction = equirect_transform.basis.inverse();
+	correction.rotate(Vector3(0.0, 1.0, 0.0), -Math_PI / 2.0);
+	Vector3 relative_point = correction.xform(intersection - equirect_transform.origin);
+
+	float horizontal_intersection_angle = Math::atan2(relative_point.z, relative_point.x);
+	if (Math::abs(horizontal_intersection_angle) > central_horizontal_angle / 2.0) {
+		return Vector2(-1.0, -1.0);
+	}
+
+	float vertical_intersection_angle = Math::acos(relative_point.y / radius) - (Math_PI / 2.0);
+	if (vertical_intersection_angle < 0) {
+		if (Math::abs(vertical_intersection_angle) > upper_vertical_angle) {
+			return Vector2(-1.0, -1.0);
+		}
+	} else if (vertical_intersection_angle > lower_vertical_angle) {
+		return Vector2(-1.0, -1.0);
+	}
+
+	// Re-center the intersection angle if the vertical angle is uneven between upper and lower.
+	if (upper_vertical_angle != lower_vertical_angle) {
+		vertical_intersection_angle -= (-upper_vertical_angle + lower_vertical_angle) / 2.0;
+	}
+
+	float u = 0.5 + (horizontal_intersection_angle / central_horizontal_angle);
+	float v = 0.5 + (vertical_intersection_angle / (upper_vertical_angle + lower_vertical_angle));
+
+	return Vector2(u, v);
+}
diff --git a/modules/openxr/scene/openxr_composition_layer_equirect.h b/modules/openxr/scene/openxr_composition_layer_equirect.h
index b74e131f35..7a002a48dc 100644
--- a/modules/openxr/scene/openxr_composition_layer_equirect.h
+++ b/modules/openxr/scene/openxr_composition_layer_equirect.h
@@ -70,6 +70,8 @@ public:
 	void set_fallback_segments(uint32_t p_fallback_segments);
 	uint32_t get_fallback_segments() const;
 
+	virtual Vector2 intersects_ray(const Vector3 &p_origin, const Vector3 &p_direction) const override;
+
 	OpenXRCompositionLayerEquirect();
 	~OpenXRCompositionLayerEquirect();
 };
diff --git a/modules/openxr/scene/openxr_composition_layer_quad.cpp b/modules/openxr/scene/openxr_composition_layer_quad.cpp
index 342df05808..17d57851e4 100644
--- a/modules/openxr/scene/openxr_composition_layer_quad.cpp
+++ b/modules/openxr/scene/openxr_composition_layer_quad.cpp
@@ -96,3 +96,36 @@ void OpenXRCompositionLayerQuad::set_quad_size(const Size2 &p_size) {
 Size2 OpenXRCompositionLayerQuad::get_quad_size() const {
 	return quad_size;
 }
+
+Vector2 OpenXRCompositionLayerQuad::intersects_ray(const Vector3 &p_origin, const Vector3 &p_direction) const {
+	Transform3D quad_transform = get_global_transform();
+	Vector3 quad_normal = quad_transform.basis.get_column(2);
+
+	float denom = quad_normal.dot(p_direction);
+	if (Math::abs(denom) > 0.0001) {
+		Vector3 vector = quad_transform.origin - p_origin;
+		float t = vector.dot(quad_normal) / denom;
+		if (t < 0.0) {
+			return Vector2(-1.0, -1.0);
+		}
+		Vector3 intersection = p_origin + p_direction * t;
+
+		Vector3 relative_point = intersection - quad_transform.origin;
+		Vector2 projected_point = Vector2(
+				relative_point.dot(quad_transform.basis.get_column(0)),
+				relative_point.dot(quad_transform.basis.get_column(1)));
+		if (Math::abs(projected_point.x) > quad_size.x / 2.0) {
+			return Vector2(-1.0, -1.0);
+		}
+		if (Math::abs(projected_point.y) > quad_size.y / 2.0) {
+			return Vector2(-1.0, -1.0);
+		}
+
+		float u = 0.5 + (projected_point.x / quad_size.x);
+		float v = 1.0 - (0.5 + (projected_point.y / quad_size.y));
+
+		return Vector2(u, v);
+	}
+
+	return Vector2(-1.0, -1.0);
+}
diff --git a/modules/openxr/scene/openxr_composition_layer_quad.h b/modules/openxr/scene/openxr_composition_layer_quad.h
index e1141586c1..d88b596984 100644
--- a/modules/openxr/scene/openxr_composition_layer_quad.h
+++ b/modules/openxr/scene/openxr_composition_layer_quad.h
@@ -54,6 +54,8 @@ public:
 	void set_quad_size(const Size2 &p_size);
 	Size2 get_quad_size() const;
 
+	virtual Vector2 intersects_ray(const Vector3 &p_origin, const Vector3 &p_direction) const override;
+
 	OpenXRCompositionLayerQuad();
 	~OpenXRCompositionLayerQuad();
 };
diff --git a/modules/regex/tests/test_regex.h b/modules/regex/tests/test_regex.h
index 1c305f70f7..af58e2487b 100644
--- a/modules/regex/tests/test_regex.h
+++ b/modules/regex/tests/test_regex.h
@@ -184,6 +184,48 @@ TEST_CASE("[RegEx] Empty pattern") {
 	CHECK(re.is_valid());
 }
 
+TEST_CASE("[RegEx] Complex Grouping") {
+	const String test = "https://docs.godotengine.org/en/latest/contributing/";
+
+	// Ignored protocol in grouping.
+	RegEx re("^(?:https?://)([a-zA-Z]{2,4})\\.([a-zA-Z][a-zA-Z0-9_\\-]{2,64})\\.([a-zA-Z]{2,4})");
+	REQUIRE(re.is_valid());
+	Ref<RegExMatch> expr = re.search(test);
+
+	CHECK(expr->get_group_count() == 3);
+
+	CHECK(expr->get_string(0) == "https://docs.godotengine.org");
+
+	CHECK(expr->get_string(1) == "docs");
+	CHECK(expr->get_string(2) == "godotengine");
+	CHECK(expr->get_string(3) == "org");
+}
+
+TEST_CASE("[RegEx] Number Expression") {
+	const String test = "(2.5e-3 + 35 + 46) / 2.8e0 = 28.9294642857";
+
+	// Not an exact regex for number but a good test.
+	RegEx re("([+-]?\\d+)(\\.\\d+([eE][+-]?\\d+)?)?");
+	REQUIRE(re.is_valid());
+	Array number_match = re.search_all(test);
+
+	CHECK(number_match.size() == 5);
+
+	Ref<RegExMatch> number = number_match[0];
+	CHECK(number->get_string(0) == "2.5e-3");
+	CHECK(number->get_string(1) == "2");
+	number = number_match[1];
+	CHECK(number->get_string(0) == "35");
+	number = number_match[2];
+	CHECK(number->get_string(0) == "46");
+	number = number_match[3];
+	CHECK(number->get_string(0) == "2.8e0");
+	number = number_match[4];
+	CHECK(number->get_string(0) == "28.9294642857");
+	CHECK(number->get_string(1) == "28");
+	CHECK(number->get_string(2) == ".9294642857");
+}
+
 TEST_CASE("[RegEx] Invalid end position") {
 	const String s = "Godot";
 
diff --git a/modules/text_server_adv/SCsub b/modules/text_server_adv/SCsub
index b04ad77bc9..15695476de 100644
--- a/modules/text_server_adv/SCsub
+++ b/modules/text_server_adv/SCsub
@@ -117,6 +117,7 @@ if env["builtin_harfbuzz"]:
         "src/hb-subset-cff1.cc",
         "src/hb-subset-cff2.cc",
         "src/hb-subset-input.cc",
+        "src/hb-subset-instancer-iup.cc",
         "src/hb-subset-instancer-solver.cc",
         "src/hb-subset-plan.cc",
         "src/hb-subset-repacker.cc",
diff --git a/modules/text_server_adv/gdextension_build/SConstruct b/modules/text_server_adv/gdextension_build/SConstruct
index 1f27f5ade9..1d9d36fbbf 100644
--- a/modules/text_server_adv/gdextension_build/SConstruct
+++ b/modules/text_server_adv/gdextension_build/SConstruct
@@ -357,6 +357,7 @@ thirdparty_harfbuzz_sources = [
     "src/hb-subset-cff1.cc",
     "src/hb-subset-cff2.cc",
     "src/hb-subset-input.cc",
+    "src/hb-subset-instancer-iup.cc",
     "src/hb-subset-instancer-solver.cc",
     "src/hb-subset-plan.cc",
     "src/hb-subset-repacker.cc",
diff --git a/modules/text_server_adv/text_server_adv.cpp b/modules/text_server_adv/text_server_adv.cpp
index c092fd7511..1ed335fe99 100644
--- a/modules/text_server_adv/text_server_adv.cpp
+++ b/modules/text_server_adv/text_server_adv.cpp
@@ -958,7 +958,7 @@ void TextServerAdvanced::_generateMTSDF_threaded(void *p_td, uint32_t p_y) {
 	}
 }
 
-_FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_msdf(FontAdvanced *p_font_data, FontForSizeAdvanced *p_data, int p_pixel_range, int p_rect_margin, FT_Outline *outline, const Vector2 &advance) const {
+_FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_msdf(FontAdvanced *p_font_data, FontForSizeAdvanced *p_data, int p_pixel_range, int p_rect_margin, FT_Outline *p_outline, const Vector2 &p_advance) const {
 	msdfgen::Shape shape;
 
 	shape.contours.clear();
@@ -974,13 +974,13 @@ _FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_msdf(
 	ft_functions.shift = 0;
 	ft_functions.delta = 0;
 
-	int error = FT_Outline_Decompose(outline, &ft_functions, &context);
+	int error = FT_Outline_Decompose(p_outline, &ft_functions, &context);
 	ERR_FAIL_COND_V_MSG(error, FontGlyph(), "FreeType: Outline decomposition error: '" + String(FT_Error_String(error)) + "'.");
 	if (!shape.contours.empty() && shape.contours.back().edges.empty()) {
 		shape.contours.pop_back();
 	}
 
-	if (FT_Outline_Get_Orientation(outline) == 1) {
+	if (FT_Outline_Get_Orientation(p_outline) == 1) {
 		for (int i = 0; i < (int)shape.contours.size(); ++i) {
 			shape.contours[i].reverse();
 		}
@@ -993,12 +993,19 @@ _FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_msdf(
 
 	FontGlyph chr;
 	chr.found = true;
-	chr.advance = advance;
+	chr.advance = p_advance;
 
 	if (shape.validate() && shape.contours.size() > 0) {
 		int w = (bounds.r - bounds.l);
 		int h = (bounds.t - bounds.b);
 
+		if (w == 0 || h == 0) {
+			chr.texture_idx = -1;
+			chr.uv_rect = Rect2();
+			chr.rect = Rect2();
+			return chr;
+		}
+
 		int mw = w + p_rect_margin * 4;
 		int mh = h + p_rect_margin * 4;
 
@@ -1056,12 +1063,24 @@ _FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_msdf(
 #endif
 
 #ifdef MODULE_FREETYPE_ENABLED
-_FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_bitmap(FontForSizeAdvanced *p_data, int p_rect_margin, FT_Bitmap bitmap, int yofs, int xofs, const Vector2 &advance, bool p_bgra) const {
-	int w = bitmap.width;
-	int h = bitmap.rows;
+_FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_bitmap(FontForSizeAdvanced *p_data, int p_rect_margin, FT_Bitmap p_bitmap, int p_yofs, int p_xofs, const Vector2 &p_advance, bool p_bgra) const {
+	FontGlyph chr;
+	chr.advance = p_advance * p_data->scale / p_data->oversampling;
+	chr.found = true;
+
+	int w = p_bitmap.width;
+	int h = p_bitmap.rows;
+
+	if (w == 0 || h == 0) {
+		chr.texture_idx = -1;
+		chr.uv_rect = Rect2();
+		chr.rect = Rect2();
+		return chr;
+	}
+
 	int color_size = 2;
 
-	switch (bitmap.pixel_mode) {
+	switch (p_bitmap.pixel_mode) {
 		case FT_PIXEL_MODE_MONO:
 		case FT_PIXEL_MODE_GRAY: {
 			color_size = 2;
@@ -1100,54 +1119,54 @@ _FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_bitma
 			for (int j = 0; j < w; j++) {
 				int ofs = ((i + tex_pos.y + p_rect_margin * 2) * tex.texture_w + j + tex_pos.x + p_rect_margin * 2) * color_size;
 				ERR_FAIL_COND_V(ofs >= tex.image->data_size(), FontGlyph());
-				switch (bitmap.pixel_mode) {
+				switch (p_bitmap.pixel_mode) {
 					case FT_PIXEL_MODE_MONO: {
-						int byte = i * bitmap.pitch + (j >> 3);
+						int byte = i * p_bitmap.pitch + (j >> 3);
 						int bit = 1 << (7 - (j % 8));
 						wr[ofs + 0] = 255; // grayscale as 1
-						wr[ofs + 1] = (bitmap.buffer[byte] & bit) ? 255 : 0;
+						wr[ofs + 1] = (p_bitmap.buffer[byte] & bit) ? 255 : 0;
 					} break;
 					case FT_PIXEL_MODE_GRAY:
 						wr[ofs + 0] = 255; // grayscale as 1
-						wr[ofs + 1] = bitmap.buffer[i * bitmap.pitch + j];
+						wr[ofs + 1] = p_bitmap.buffer[i * p_bitmap.pitch + j];
 						break;
 					case FT_PIXEL_MODE_BGRA: {
-						int ofs_color = i * bitmap.pitch + (j << 2);
-						wr[ofs + 2] = bitmap.buffer[ofs_color + 0];
-						wr[ofs + 1] = bitmap.buffer[ofs_color + 1];
-						wr[ofs + 0] = bitmap.buffer[ofs_color + 2];
-						wr[ofs + 3] = bitmap.buffer[ofs_color + 3];
+						int ofs_color = i * p_bitmap.pitch + (j << 2);
+						wr[ofs + 2] = p_bitmap.buffer[ofs_color + 0];
+						wr[ofs + 1] = p_bitmap.buffer[ofs_color + 1];
+						wr[ofs + 0] = p_bitmap.buffer[ofs_color + 2];
+						wr[ofs + 3] = p_bitmap.buffer[ofs_color + 3];
 					} break;
 					case FT_PIXEL_MODE_LCD: {
-						int ofs_color = i * bitmap.pitch + (j * 3);
+						int ofs_color = i * p_bitmap.pitch + (j * 3);
 						if (p_bgra) {
-							wr[ofs + 0] = bitmap.buffer[ofs_color + 2];
-							wr[ofs + 1] = bitmap.buffer[ofs_color + 1];
-							wr[ofs + 2] = bitmap.buffer[ofs_color + 0];
+							wr[ofs + 0] = p_bitmap.buffer[ofs_color + 2];
+							wr[ofs + 1] = p_bitmap.buffer[ofs_color + 1];
+							wr[ofs + 2] = p_bitmap.buffer[ofs_color + 0];
 							wr[ofs + 3] = 255;
 						} else {
-							wr[ofs + 0] = bitmap.buffer[ofs_color + 0];
-							wr[ofs + 1] = bitmap.buffer[ofs_color + 1];
-							wr[ofs + 2] = bitmap.buffer[ofs_color + 2];
+							wr[ofs + 0] = p_bitmap.buffer[ofs_color + 0];
+							wr[ofs + 1] = p_bitmap.buffer[ofs_color + 1];
+							wr[ofs + 2] = p_bitmap.buffer[ofs_color + 2];
 							wr[ofs + 3] = 255;
 						}
 					} break;
 					case FT_PIXEL_MODE_LCD_V: {
-						int ofs_color = i * bitmap.pitch * 3 + j;
+						int ofs_color = i * p_bitmap.pitch * 3 + j;
 						if (p_bgra) {
-							wr[ofs + 0] = bitmap.buffer[ofs_color + bitmap.pitch * 2];
-							wr[ofs + 1] = bitmap.buffer[ofs_color + bitmap.pitch];
-							wr[ofs + 2] = bitmap.buffer[ofs_color + 0];
+							wr[ofs + 0] = p_bitmap.buffer[ofs_color + p_bitmap.pitch * 2];
+							wr[ofs + 1] = p_bitmap.buffer[ofs_color + p_bitmap.pitch];
+							wr[ofs + 2] = p_bitmap.buffer[ofs_color + 0];
 							wr[ofs + 3] = 255;
 						} else {
-							wr[ofs + 0] = bitmap.buffer[ofs_color + 0];
-							wr[ofs + 1] = bitmap.buffer[ofs_color + bitmap.pitch];
-							wr[ofs + 2] = bitmap.buffer[ofs_color + bitmap.pitch * 2];
+							wr[ofs + 0] = p_bitmap.buffer[ofs_color + 0];
+							wr[ofs + 1] = p_bitmap.buffer[ofs_color + p_bitmap.pitch];
+							wr[ofs + 2] = p_bitmap.buffer[ofs_color + p_bitmap.pitch * 2];
 							wr[ofs + 3] = 255;
 						}
 					} break;
 					default:
-						ERR_FAIL_V_MSG(FontGlyph(), "Font uses unsupported pixel format: " + String::num_int64(bitmap.pixel_mode) + ".");
+						ERR_FAIL_V_MSG(FontGlyph(), "Font uses unsupported pixel format: " + String::num_int64(p_bitmap.pixel_mode) + ".");
 						break;
 				}
 			}
@@ -1156,13 +1175,10 @@ _FORCE_INLINE_ TextServerAdvanced::FontGlyph TextServerAdvanced::rasterize_bitma
 
 	tex.dirty = true;
 
-	FontGlyph chr;
-	chr.advance = advance * p_data->scale / p_data->oversampling;
 	chr.texture_idx = tex_pos.index;
-	chr.found = true;
 
 	chr.uv_rect = Rect2(tex_pos.x + p_rect_margin, tex_pos.y + p_rect_margin, w + p_rect_margin * 2, h + p_rect_margin * 2);
-	chr.rect.position = Vector2(xofs - p_rect_margin, -yofs - p_rect_margin) * p_data->scale / p_data->oversampling;
+	chr.rect.position = Vector2(p_xofs - p_rect_margin, -p_yofs - p_rect_margin) * p_data->scale / p_data->oversampling;
 	chr.rect.size = chr.uv_rect.size * p_data->scale / p_data->oversampling;
 	return chr;
 }
@@ -3619,9 +3635,6 @@ void TextServerAdvanced::_font_draw_glyph(const RID &p_font_rid, const RID &p_ca
 
 	const FontGlyph &gl = fd->cache[size]->glyph_map[index];
 	if (gl.found) {
-		if (gl.uv_rect.size.x <= 2 || gl.uv_rect.size.y <= 2) {
-			return; // Nothing to draw.
-		}
 		ERR_FAIL_COND(gl.texture_idx < -1 || gl.texture_idx >= fd->cache[size]->textures.size());
 
 		if (gl.texture_idx != -1) {
@@ -3730,9 +3743,6 @@ void TextServerAdvanced::_font_draw_glyph_outline(const RID &p_font_rid, const R
 
 	const FontGlyph &gl = fd->cache[size]->glyph_map[index];
 	if (gl.found) {
-		if (gl.uv_rect.size.x <= 2 || gl.uv_rect.size.y <= 2) {
-			return; // Nothing to draw.
-		}
 		ERR_FAIL_COND(gl.texture_idx < -1 || gl.texture_idx >= fd->cache[size]->textures.size());
 
 		if (gl.texture_idx != -1) {
diff --git a/modules/text_server_adv/text_server_adv.h b/modules/text_server_adv/text_server_adv.h
index 160eb27528..1cd73a6999 100644
--- a/modules/text_server_adv/text_server_adv.h
+++ b/modules/text_server_adv/text_server_adv.h
@@ -354,10 +354,10 @@ class TextServerAdvanced : public TextServerExtension {
 
 	_FORCE_INLINE_ FontTexturePosition find_texture_pos_for_glyph(FontForSizeAdvanced *p_data, int p_color_size, Image::Format p_image_format, int p_width, int p_height, bool p_msdf) const;
 #ifdef MODULE_MSDFGEN_ENABLED
-	_FORCE_INLINE_ FontGlyph rasterize_msdf(FontAdvanced *p_font_data, FontForSizeAdvanced *p_data, int p_pixel_range, int p_rect_margin, FT_Outline *outline, const Vector2 &advance) const;
+	_FORCE_INLINE_ FontGlyph rasterize_msdf(FontAdvanced *p_font_data, FontForSizeAdvanced *p_data, int p_pixel_range, int p_rect_margin, FT_Outline *p_outline, const Vector2 &p_advance) const;
 #endif
 #ifdef MODULE_FREETYPE_ENABLED
-	_FORCE_INLINE_ FontGlyph rasterize_bitmap(FontForSizeAdvanced *p_data, int p_rect_margin, FT_Bitmap bitmap, int yofs, int xofs, const Vector2 &advance, bool p_bgra) const;
+	_FORCE_INLINE_ FontGlyph rasterize_bitmap(FontForSizeAdvanced *p_data, int p_rect_margin, FT_Bitmap p_bitmap, int p_yofs, int p_xofs, const Vector2 &p_advance, bool p_bgra) const;
 #endif
 	_FORCE_INLINE_ bool _ensure_glyph(FontAdvanced *p_font_data, const Vector2i &p_size, int32_t p_glyph) const;
 	_FORCE_INLINE_ bool _ensure_cache_for_size(FontAdvanced *p_font_data, const Vector2i &p_size) const;
diff --git a/modules/text_server_fb/text_server_fb.cpp b/modules/text_server_fb/text_server_fb.cpp
index c3f62d7324..c62f308818 100644
--- a/modules/text_server_fb/text_server_fb.cpp
+++ b/modules/text_server_fb/text_server_fb.cpp
@@ -394,7 +394,7 @@ void TextServerFallback::_generateMTSDF_threaded(void *p_td, uint32_t p_y) {
 	}
 }
 
-_FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_msdf(FontFallback *p_font_data, FontForSizeFallback *p_data, int p_pixel_range, int p_rect_margin, FT_Outline *outline, const Vector2 &advance) const {
+_FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_msdf(FontFallback *p_font_data, FontForSizeFallback *p_data, int p_pixel_range, int p_rect_margin, FT_Outline *p_outline, const Vector2 &p_advance) const {
 	msdfgen::Shape shape;
 
 	shape.contours.clear();
@@ -410,13 +410,13 @@ _FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_msdf(
 	ft_functions.shift = 0;
 	ft_functions.delta = 0;
 
-	int error = FT_Outline_Decompose(outline, &ft_functions, &context);
+	int error = FT_Outline_Decompose(p_outline, &ft_functions, &context);
 	ERR_FAIL_COND_V_MSG(error, FontGlyph(), "FreeType: Outline decomposition error: '" + String(FT_Error_String(error)) + "'.");
 	if (!shape.contours.empty() && shape.contours.back().edges.empty()) {
 		shape.contours.pop_back();
 	}
 
-	if (FT_Outline_Get_Orientation(outline) == 1) {
+	if (FT_Outline_Get_Orientation(p_outline) == 1) {
 		for (int i = 0; i < (int)shape.contours.size(); ++i) {
 			shape.contours[i].reverse();
 		}
@@ -429,12 +429,18 @@ _FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_msdf(
 
 	FontGlyph chr;
 	chr.found = true;
-	chr.advance = advance;
+	chr.advance = p_advance;
 
 	if (shape.validate() && shape.contours.size() > 0) {
 		int w = (bounds.r - bounds.l);
 		int h = (bounds.t - bounds.b);
 
+		if (w == 0 || h == 0) {
+			chr.texture_idx = -1;
+			chr.uv_rect = Rect2();
+			chr.rect = Rect2();
+			return chr;
+		}
 		int mw = w + p_rect_margin * 4;
 		int mh = h + p_rect_margin * 4;
 
@@ -491,12 +497,24 @@ _FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_msdf(
 #endif
 
 #ifdef MODULE_FREETYPE_ENABLED
-_FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_bitmap(FontForSizeFallback *p_data, int p_rect_margin, FT_Bitmap bitmap, int yofs, int xofs, const Vector2 &advance, bool p_bgra) const {
-	int w = bitmap.width;
-	int h = bitmap.rows;
+_FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_bitmap(FontForSizeFallback *p_data, int p_rect_margin, FT_Bitmap p_bitmap, int p_yofs, int p_xofs, const Vector2 &p_advance, bool p_bgra) const {
+	FontGlyph chr;
+	chr.advance = p_advance * p_data->scale / p_data->oversampling;
+	chr.found = true;
+
+	int w = p_bitmap.width;
+	int h = p_bitmap.rows;
+
+	if (w == 0 || h == 0) {
+		chr.texture_idx = -1;
+		chr.uv_rect = Rect2();
+		chr.rect = Rect2();
+		return chr;
+	}
+
 	int color_size = 2;
 
-	switch (bitmap.pixel_mode) {
+	switch (p_bitmap.pixel_mode) {
 		case FT_PIXEL_MODE_MONO:
 		case FT_PIXEL_MODE_GRAY: {
 			color_size = 2;
@@ -535,54 +553,54 @@ _FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_bitma
 			for (int j = 0; j < w; j++) {
 				int ofs = ((i + tex_pos.y + p_rect_margin * 2) * tex.texture_w + j + tex_pos.x + p_rect_margin * 2) * color_size;
 				ERR_FAIL_COND_V(ofs >= tex.image->data_size(), FontGlyph());
-				switch (bitmap.pixel_mode) {
+				switch (p_bitmap.pixel_mode) {
 					case FT_PIXEL_MODE_MONO: {
-						int byte = i * bitmap.pitch + (j >> 3);
+						int byte = i * p_bitmap.pitch + (j >> 3);
 						int bit = 1 << (7 - (j % 8));
 						wr[ofs + 0] = 255; // grayscale as 1
-						wr[ofs + 1] = (bitmap.buffer[byte] & bit) ? 255 : 0;
+						wr[ofs + 1] = (p_bitmap.buffer[byte] & bit) ? 255 : 0;
 					} break;
 					case FT_PIXEL_MODE_GRAY:
 						wr[ofs + 0] = 255; // grayscale as 1
-						wr[ofs + 1] = bitmap.buffer[i * bitmap.pitch + j];
+						wr[ofs + 1] = p_bitmap.buffer[i * p_bitmap.pitch + j];
 						break;
 					case FT_PIXEL_MODE_BGRA: {
-						int ofs_color = i * bitmap.pitch + (j << 2);
-						wr[ofs + 2] = bitmap.buffer[ofs_color + 0];
-						wr[ofs + 1] = bitmap.buffer[ofs_color + 1];
-						wr[ofs + 0] = bitmap.buffer[ofs_color + 2];
-						wr[ofs + 3] = bitmap.buffer[ofs_color + 3];
+						int ofs_color = i * p_bitmap.pitch + (j << 2);
+						wr[ofs + 2] = p_bitmap.buffer[ofs_color + 0];
+						wr[ofs + 1] = p_bitmap.buffer[ofs_color + 1];
+						wr[ofs + 0] = p_bitmap.buffer[ofs_color + 2];
+						wr[ofs + 3] = p_bitmap.buffer[ofs_color + 3];
 					} break;
 					case FT_PIXEL_MODE_LCD: {
-						int ofs_color = i * bitmap.pitch + (j * 3);
+						int ofs_color = i * p_bitmap.pitch + (j * 3);
 						if (p_bgra) {
-							wr[ofs + 0] = bitmap.buffer[ofs_color + 2];
-							wr[ofs + 1] = bitmap.buffer[ofs_color + 1];
-							wr[ofs + 2] = bitmap.buffer[ofs_color + 0];
+							wr[ofs + 0] = p_bitmap.buffer[ofs_color + 2];
+							wr[ofs + 1] = p_bitmap.buffer[ofs_color + 1];
+							wr[ofs + 2] = p_bitmap.buffer[ofs_color + 0];
 							wr[ofs + 3] = 255;
 						} else {
-							wr[ofs + 0] = bitmap.buffer[ofs_color + 0];
-							wr[ofs + 1] = bitmap.buffer[ofs_color + 1];
-							wr[ofs + 2] = bitmap.buffer[ofs_color + 2];
+							wr[ofs + 0] = p_bitmap.buffer[ofs_color + 0];
+							wr[ofs + 1] = p_bitmap.buffer[ofs_color + 1];
+							wr[ofs + 2] = p_bitmap.buffer[ofs_color + 2];
 							wr[ofs + 3] = 255;
 						}
 					} break;
 					case FT_PIXEL_MODE_LCD_V: {
-						int ofs_color = i * bitmap.pitch * 3 + j;
+						int ofs_color = i * p_bitmap.pitch * 3 + j;
 						if (p_bgra) {
-							wr[ofs + 0] = bitmap.buffer[ofs_color + bitmap.pitch * 2];
-							wr[ofs + 1] = bitmap.buffer[ofs_color + bitmap.pitch];
-							wr[ofs + 2] = bitmap.buffer[ofs_color + 0];
+							wr[ofs + 0] = p_bitmap.buffer[ofs_color + p_bitmap.pitch * 2];
+							wr[ofs + 1] = p_bitmap.buffer[ofs_color + p_bitmap.pitch];
+							wr[ofs + 2] = p_bitmap.buffer[ofs_color + 0];
 							wr[ofs + 3] = 255;
 						} else {
-							wr[ofs + 0] = bitmap.buffer[ofs_color + 0];
-							wr[ofs + 1] = bitmap.buffer[ofs_color + bitmap.pitch];
-							wr[ofs + 2] = bitmap.buffer[ofs_color + bitmap.pitch * 2];
+							wr[ofs + 0] = p_bitmap.buffer[ofs_color + 0];
+							wr[ofs + 1] = p_bitmap.buffer[ofs_color + p_bitmap.pitch];
+							wr[ofs + 2] = p_bitmap.buffer[ofs_color + p_bitmap.pitch * 2];
 							wr[ofs + 3] = 255;
 						}
 					} break;
 					default:
-						ERR_FAIL_V_MSG(FontGlyph(), "Font uses unsupported pixel format: " + String::num_int64(bitmap.pixel_mode) + ".");
+						ERR_FAIL_V_MSG(FontGlyph(), "Font uses unsupported pixel format: " + String::num_int64(p_bitmap.pixel_mode) + ".");
 						break;
 				}
 			}
@@ -591,13 +609,10 @@ _FORCE_INLINE_ TextServerFallback::FontGlyph TextServerFallback::rasterize_bitma
 
 	tex.dirty = true;
 
-	FontGlyph chr;
-	chr.advance = advance * p_data->scale / p_data->oversampling;
 	chr.texture_idx = tex_pos.index;
-	chr.found = true;
 
 	chr.uv_rect = Rect2(tex_pos.x + p_rect_margin, tex_pos.y + p_rect_margin, w + p_rect_margin * 2, h + p_rect_margin * 2);
-	chr.rect.position = Vector2(xofs - p_rect_margin, -yofs - p_rect_margin) * p_data->scale / p_data->oversampling;
+	chr.rect.position = Vector2(p_xofs - p_rect_margin, -p_yofs - p_rect_margin) * p_data->scale / p_data->oversampling;
 	chr.rect.size = chr.uv_rect.size * p_data->scale / p_data->oversampling;
 	return chr;
 }
@@ -2567,9 +2582,6 @@ void TextServerFallback::_font_draw_glyph(const RID &p_font_rid, const RID &p_ca
 
 	const FontGlyph &gl = fd->cache[size]->glyph_map[index];
 	if (gl.found) {
-		if (gl.uv_rect.size.x <= 2 || gl.uv_rect.size.y <= 2) {
-			return; // Nothing to draw.
-		}
 		ERR_FAIL_COND(gl.texture_idx < -1 || gl.texture_idx >= fd->cache[size]->textures.size());
 
 		if (gl.texture_idx != -1) {
@@ -2678,9 +2690,6 @@ void TextServerFallback::_font_draw_glyph_outline(const RID &p_font_rid, const R
 
 	const FontGlyph &gl = fd->cache[size]->glyph_map[index];
 	if (gl.found) {
-		if (gl.uv_rect.size.x <= 2 || gl.uv_rect.size.y <= 2) {
-			return; // Nothing to draw.
-		}
 		ERR_FAIL_COND(gl.texture_idx < -1 || gl.texture_idx >= fd->cache[size]->textures.size());
 
 		if (gl.texture_idx != -1) {
diff --git a/modules/text_server_fb/text_server_fb.h b/modules/text_server_fb/text_server_fb.h
index 9fd5af8b51..31370c7da7 100644
--- a/modules/text_server_fb/text_server_fb.h
+++ b/modules/text_server_fb/text_server_fb.h
@@ -303,10 +303,10 @@ class TextServerFallback : public TextServerExtension {
 
 	_FORCE_INLINE_ FontTexturePosition find_texture_pos_for_glyph(FontForSizeFallback *p_data, int p_color_size, Image::Format p_image_format, int p_width, int p_height, bool p_msdf) const;
 #ifdef MODULE_MSDFGEN_ENABLED
-	_FORCE_INLINE_ FontGlyph rasterize_msdf(FontFallback *p_font_data, FontForSizeFallback *p_data, int p_pixel_range, int p_rect_margin, FT_Outline *outline, const Vector2 &advance) const;
+	_FORCE_INLINE_ FontGlyph rasterize_msdf(FontFallback *p_font_data, FontForSizeFallback *p_data, int p_pixel_range, int p_rect_margin, FT_Outline *p_outline, const Vector2 &p_advance) const;
 #endif
 #ifdef MODULE_FREETYPE_ENABLED
-	_FORCE_INLINE_ FontGlyph rasterize_bitmap(FontForSizeFallback *p_data, int p_rect_margin, FT_Bitmap bitmap, int yofs, int xofs, const Vector2 &advance, bool p_bgra) const;
+	_FORCE_INLINE_ FontGlyph rasterize_bitmap(FontForSizeFallback *p_data, int p_rect_margin, FT_Bitmap p_bitmap, int p_yofs, int p_xofs, const Vector2 &p_advance, bool p_bgra) const;
 #endif
 	_FORCE_INLINE_ bool _ensure_glyph(FontFallback *p_font_data, const Vector2i &p_size, int32_t p_glyph) const;
 	_FORCE_INLINE_ bool _ensure_cache_for_size(FontFallback *p_font_data, const Vector2i &p_size) const;
diff --git a/modules/zip/zip_reader.cpp b/modules/zip/zip_reader.cpp
index f4a92dce5b..123d1e5d46 100644
--- a/modules/zip/zip_reader.cpp
+++ b/modules/zip/zip_reader.cpp
@@ -58,7 +58,14 @@ Error ZIPReader::close() {
 PackedStringArray ZIPReader::get_files() {
 	ERR_FAIL_COND_V_MSG(fa.is_null(), PackedStringArray(), "ZIPReader must be opened before use.");
 
-	int err = unzGoToFirstFile(uzf);
+	unz_global_info gi;
+	int err = unzGetGlobalInfo(uzf, &gi);
+	ERR_FAIL_COND_V(err != UNZ_OK, PackedStringArray());
+	if (gi.number_entry == 0) {
+		return PackedStringArray();
+	}
+
+	err = unzGoToFirstFile(uzf);
 	ERR_FAIL_COND_V(err != UNZ_OK, PackedStringArray());
 
 	List<String> s;
diff --git a/platform/android/doc_classes/EditorExportPlatformAndroid.xml b/platform/android/doc_classes/EditorExportPlatformAndroid.xml
index 7fce5359ae..341d14a3b1 100644
--- a/platform/android/doc_classes/EditorExportPlatformAndroid.xml
+++ b/platform/android/doc_classes/EditorExportPlatformAndroid.xml
@@ -380,6 +380,9 @@
 		<member name="permissions/persistent_activity" type="bool" setter="" getter="" deprecated="Deprecated in API level 15.">
 			Allow an application to make its activities persistent.
 		</member>
+		<member name="permissions/post_notifications" type="bool" setter="" getter="" deprecated="">
+			Allow an application to post notifications. Added in API level 33. See [url=https://developer.android.com/develop/ui/views/notifications/notification-permission]Notification runtime permission[/url].
+		</member>
 		<member name="permissions/process_outgoing_calls" type="bool" setter="" getter="" deprecated="Deprecated in API level 29.">
 			Allows an application to see the number being dialed during an outgoing call with the option to redirect the call to a different number or abort the call altogether. See [url=https://developer.android.com/reference/android/Manifest.permission#PROCESS_OUTGOING_CALLS]PROCESS_OUTGOING_CALLS[/url].
 		</member>
@@ -595,6 +598,7 @@
 			Application version visible to the user. Falls back to [member ProjectSettings.application/config/version] if left empty.
 		</member>
 		<member name="xr_features/xr_mode" type="int" setter="" getter="">
+			The extended reality (XR) mode for this application.
 		</member>
 	</members>
 </class>
diff --git a/platform/android/export/export_plugin.cpp b/platform/android/export/export_plugin.cpp
index 19e215bfaf..b19760026a 100644
--- a/platform/android/export/export_plugin.cpp
+++ b/platform/android/export/export_plugin.cpp
@@ -141,6 +141,7 @@ static const char *android_perms[] = {
 	"MOUNT_UNMOUNT_FILESYSTEMS",
 	"NFC",
 	"PERSISTENT_ACTIVITY",
+	"POST_NOTIFICATIONS",
 	"PROCESS_OUTGOING_CALLS",
 	"READ_CALENDAR",
 	"READ_CALL_LOG",
@@ -1938,6 +1939,12 @@ bool EditorExportPlatformAndroid::get_export_option_visibility(const EditorExpor
 		// The APK templates are ignored if Gradle build is enabled.
 		return advanced_options_enabled && !bool(p_preset->get("gradle_build/use_gradle_build"));
 	}
+
+	// Hide .NET embedding option (always enabled).
+	if (p_option == "dotnet/embed_build_outputs") {
+		return false;
+	}
+
 	return true;
 }
 
@@ -2651,7 +2658,7 @@ String EditorExportPlatformAndroid::get_apk_expansion_fullpath(const Ref<EditorE
 
 Error EditorExportPlatformAndroid::save_apk_expansion_file(const Ref<EditorExportPreset> &p_preset, bool p_debug, const String &p_path) {
 	String fullpath = get_apk_expansion_fullpath(p_preset, p_path);
-	Error err = save_pack(false, p_preset, p_debug, fullpath);
+	Error err = save_pack(p_preset, p_debug, fullpath);
 	return err;
 }
 
@@ -3095,9 +3102,9 @@ Error EditorExportPlatformAndroid::export_project_helper(const Ref<EditorExportP
 			user_data.libs_directory = gradle_build_directory.path_join("libs");
 			user_data.debug = p_debug;
 			if (p_flags & DEBUG_FLAG_DUMB_CLIENT) {
-				err = export_project_files(true, p_preset, p_debug, ignore_apk_file, &user_data, copy_gradle_so);
+				err = export_project_files(p_preset, p_debug, ignore_apk_file, &user_data, copy_gradle_so);
 			} else {
-				err = export_project_files(true, p_preset, p_debug, rename_and_store_file_in_gradle_project, &user_data, copy_gradle_so);
+				err = export_project_files(p_preset, p_debug, rename_and_store_file_in_gradle_project, &user_data, copy_gradle_so);
 			}
 			if (err != OK) {
 				add_message(EXPORT_MESSAGE_ERROR, TTR("Export"), TTR("Could not export project files to gradle project."));
@@ -3482,7 +3489,7 @@ Error EditorExportPlatformAndroid::export_project_helper(const Ref<EditorExportP
 		APKExportData ed;
 		ed.ep = &ep;
 		ed.apk = unaligned_apk;
-		err = export_project_files(true, p_preset, p_debug, ignore_apk_file, &ed, save_apk_so);
+		err = export_project_files(p_preset, p_debug, ignore_apk_file, &ed, save_apk_so);
 	} else {
 		if (apk_expansion) {
 			err = save_apk_expansion_file(p_preset, p_debug, p_path);
@@ -3494,7 +3501,7 @@ Error EditorExportPlatformAndroid::export_project_helper(const Ref<EditorExportP
 			APKExportData ed;
 			ed.ep = &ep;
 			ed.apk = unaligned_apk;
-			err = export_project_files(true, p_preset, p_debug, save_apk_file, &ed, save_apk_so);
+			err = export_project_files(p_preset, p_debug, save_apk_file, &ed, save_apk_so);
 		}
 	}
 
diff --git a/platform/android/os_android.cpp b/platform/android/os_android.cpp
index 82e7fdb320..bf6b7a7372 100644
--- a/platform/android/os_android.cpp
+++ b/platform/android/os_android.cpp
@@ -162,7 +162,7 @@ Vector<String> OS_Android::get_granted_permissions() const {
 	return godot_java->get_granted_permissions();
 }
 
-Error OS_Android::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path) {
+Error OS_Android::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path, bool p_generate_temp_files) {
 	String path = p_path;
 	bool so_file_exists = true;
 	if (!FileAccess::exists(path)) {
diff --git a/platform/android/os_android.h b/platform/android/os_android.h
index 31ee7389df..e01d759494 100644
--- a/platform/android/os_android.h
+++ b/platform/android/os_android.h
@@ -113,7 +113,7 @@ public:
 
 	virtual void alert(const String &p_alert, const String &p_title) override;
 
-	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) override;
+	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr, bool p_generate_temp_files = false) override;
 
 	virtual String get_name() const override;
 	virtual String get_distribution_name() const override;
diff --git a/platform/ios/doc_classes/EditorExportPlatformIOS.xml b/platform/ios/doc_classes/EditorExportPlatformIOS.xml
index 35ef6d6a78..0c0ded5fea 100644
--- a/platform/ios/doc_classes/EditorExportPlatformIOS.xml
+++ b/platform/ios/doc_classes/EditorExportPlatformIOS.xml
@@ -12,7 +12,7 @@
 	<members>
 		<member name="application/additional_plist_content" type="String" setter="" getter="">
 			Additional data added to the root [code]&lt;dict&gt;[/code] section of the [url=https://developer.apple.com/documentation/bundleresources/information_property_list]Info.plist[/url] file. The value should be an XML section with pairs of key-value elements, e.g.:
-			[codeblock]
+			[codeblock lang=text]
 			&lt;key&gt;key_name&lt;/key&gt;
 			&lt;string&gt;value&lt;/string&gt;
 			[/codeblock]
diff --git a/platform/ios/export/export_plugin.cpp b/platform/ios/export/export_plugin.cpp
index 82b4f6f904..33389129b7 100644
--- a/platform/ios/export/export_plugin.cpp
+++ b/platform/ios/export/export_plugin.cpp
@@ -135,6 +135,11 @@ void EditorExportPlatformIOS::_notification(int p_what) {
 }
 
 bool EditorExportPlatformIOS::get_export_option_visibility(const EditorExportPreset *p_preset, const String &p_option) const {
+	// Hide unsupported .NET embedding option.
+	if (p_option == "dotnet/embed_build_outputs") {
+		return false;
+	}
+
 	return true;
 }
 
@@ -1316,7 +1321,7 @@ Error EditorExportPlatformIOS::_export_additional_assets(const Ref<EditorExportP
 		if (asset.begins_with("res://")) {
 			Error err = _copy_asset(p_preset, p_out_dir, asset, nullptr, p_is_framework, p_should_embed, r_exported_assets);
 			ERR_FAIL_COND_V(err != OK, err);
-		} else if (ProjectSettings::get_singleton()->localize_path(asset).begins_with("res://")) {
+		} else if (asset.is_absolute_path() && ProjectSettings::get_singleton()->localize_path(asset).begins_with("res://")) {
 			Error err = _copy_asset(p_preset, p_out_dir, ProjectSettings::get_singleton()->localize_path(asset), nullptr, p_is_framework, p_should_embed, r_exported_assets);
 			ERR_FAIL_COND_V(err != OK, err);
 		} else {
@@ -1657,7 +1662,7 @@ Error EditorExportPlatformIOS::_export_project_helper(const Ref<EditorExportPres
 	}
 	String pack_path = binary_dir + ".pck";
 	Vector<SharedObject> libraries;
-	Error err = save_pack(true, p_preset, p_debug, pack_path, &libraries);
+	Error err = save_pack(p_preset, p_debug, pack_path, &libraries);
 	if (err) {
 		// Message is supplied by the subroutine method.
 		return err;
diff --git a/platform/ios/os_ios.h b/platform/ios/os_ios.h
index 10ecd08a89..6950720b38 100644
--- a/platform/ios/os_ios.h
+++ b/platform/ios/os_ios.h
@@ -103,7 +103,7 @@ public:
 	virtual Vector<String> get_system_font_path_for_text(const String &p_font_name, const String &p_text, const String &p_locale = String(), const String &p_script = String(), int p_weight = 400, int p_stretch = 100, bool p_italic = false) const override;
 	virtual String get_system_font_path(const String &p_font_name, int p_weight = 400, int p_stretch = 100, bool p_italic = false) const override;
 
-	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) override;
+	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr, bool p_generate_temp_files = false) override;
 	virtual Error close_dynamic_library(void *p_library_handle) override;
 	virtual Error get_dynamic_library_symbol_handle(void *p_library_handle, const String &p_name, void *&p_symbol_handle, bool p_optional = false) override;
 
diff --git a/platform/ios/os_ios.mm b/platform/ios/os_ios.mm
index ac13a4d457..c989dc6221 100644
--- a/platform/ios/os_ios.mm
+++ b/platform/ios/os_ios.mm
@@ -219,7 +219,7 @@ _FORCE_INLINE_ String OS_IOS::get_framework_executable(const String &p_path) {
 	return p_path;
 }
 
-Error OS_IOS::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path) {
+Error OS_IOS::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path, bool p_generate_temp_files) {
 	if (p_path.length() == 0) {
 		// Static xcframework.
 		p_library_handle = RTLD_SELF;
diff --git a/platform/linuxbsd/export/export_plugin.cpp b/platform/linuxbsd/export/export_plugin.cpp
index 773b124c6a..936adddda3 100644
--- a/platform/linuxbsd/export/export_plugin.cpp
+++ b/platform/linuxbsd/export/export_plugin.cpp
@@ -146,12 +146,19 @@ List<String> EditorExportPlatformLinuxBSD::get_binary_extensions(const Ref<Edito
 }
 
 bool EditorExportPlatformLinuxBSD::get_export_option_visibility(const EditorExportPreset *p_preset, const String &p_option) const {
-	if (p_preset) {
-		// Hide SSH options.
-		bool ssh = p_preset->get("ssh_remote_deploy/enabled");
-		if (!ssh && p_option != "ssh_remote_deploy/enabled" && p_option.begins_with("ssh_remote_deploy/")) {
-			return false;
-		}
+	if (p_preset == nullptr) {
+		return true;
+	}
+
+	bool advanced_options_enabled = p_preset->are_advanced_options_enabled();
+
+	// Hide SSH options.
+	bool ssh = p_preset->get("ssh_remote_deploy/enabled");
+	if (!ssh && p_option != "ssh_remote_deploy/enabled" && p_option.begins_with("ssh_remote_deploy/")) {
+		return false;
+	}
+	if (p_option == "dotnet/embed_build_outputs") {
+		return advanced_options_enabled;
 	}
 	return true;
 }
diff --git a/platform/linuxbsd/os_linuxbsd.cpp b/platform/linuxbsd/os_linuxbsd.cpp
index f29275c910..68b4cd7f5a 100644
--- a/platform/linuxbsd/os_linuxbsd.cpp
+++ b/platform/linuxbsd/os_linuxbsd.cpp
@@ -62,6 +62,10 @@
 #include <mntent.h>
 #endif
 
+#if defined(__FreeBSD__)
+#include <sys/sysctl.h>
+#endif
+
 void OS_LinuxBSD::alert(const String &p_alert, const String &p_title) {
 	const char *message_programs[] = { "zenity", "kdialog", "Xdialog", "xmessage" };
 
@@ -145,17 +149,36 @@ void OS_LinuxBSD::initialize_joypads() {
 String OS_LinuxBSD::get_unique_id() const {
 	static String machine_id;
 	if (machine_id.is_empty()) {
+#if defined(__FreeBSD__)
+		const int mib[2] = { CTL_KERN, KERN_HOSTUUID };
+		char buf[4096];
+		memset(buf, 0, sizeof(buf));
+		size_t len = sizeof(buf) - 1;
+		if (sysctl(mib, 2, buf, &len, 0x0, 0) != -1) {
+			machine_id = String::utf8(buf).replace("-", "");
+		}
+#else
 		Ref<FileAccess> f = FileAccess::open("/etc/machine-id", FileAccess::READ);
 		if (f.is_valid()) {
 			while (machine_id.is_empty() && !f->eof_reached()) {
 				machine_id = f->get_line().strip_edges();
 			}
 		}
+#endif
 	}
 	return machine_id;
 }
 
 String OS_LinuxBSD::get_processor_name() const {
+#if defined(__FreeBSD__)
+	const int mib[2] = { CTL_HW, HW_MODEL };
+	char buf[4096];
+	memset(buf, 0, sizeof(buf));
+	size_t len = sizeof(buf) - 1;
+	if (sysctl(mib, 2, buf, &len, 0x0, 0) != -1) {
+		return String::utf8(buf);
+	}
+#else
 	Ref<FileAccess> f = FileAccess::open("/proc/cpuinfo", FileAccess::READ);
 	ERR_FAIL_COND_V_MSG(f.is_null(), "", String("Couldn't open `/proc/cpuinfo` to get the CPU model name. Returning an empty string."));
 
@@ -165,8 +188,9 @@ String OS_LinuxBSD::get_processor_name() const {
 			return line.split(":")[1].strip_edges();
 		}
 	}
+#endif
 
-	ERR_FAIL_V_MSG("", String("Couldn't get the CPU model name from `/proc/cpuinfo`. Returning an empty string."));
+	ERR_FAIL_V_MSG("", String("Couldn't get the CPU model. Returning an empty string."));
 }
 
 bool OS_LinuxBSD::is_sandboxed() const {
diff --git a/platform/macos/doc_classes/EditorExportPlatformMacOS.xml b/platform/macos/doc_classes/EditorExportPlatformMacOS.xml
index 506b0dffb8..7355042a48 100644
--- a/platform/macos/doc_classes/EditorExportPlatformMacOS.xml
+++ b/platform/macos/doc_classes/EditorExportPlatformMacOS.xml
@@ -12,7 +12,7 @@
 	<members>
 		<member name="application/additional_plist_content" type="String" setter="" getter="">
 			Additional data added to the root [code]&lt;dict&gt;[/code] section of the [url=https://developer.apple.com/documentation/bundleresources/information_property_list]Info.plist[/url] file. The value should be an XML section with pairs of key-value elements, e.g.:
-			[codeblock]
+			[codeblock lang=text]
 			&lt;key&gt;key_name&lt;/key&gt;
 			&lt;string&gt;value&lt;/string&gt;
 			[/codeblock]
diff --git a/platform/macos/export/export_plugin.cpp b/platform/macos/export/export_plugin.cpp
index 05ae4a74c9..d75def9b50 100644
--- a/platform/macos/export/export_plugin.cpp
+++ b/platform/macos/export/export_plugin.cpp
@@ -333,6 +333,12 @@ bool EditorExportPlatformMacOS::get_export_option_visibility(const EditorExportP
 			return false;
 		}
 	}
+
+	// Hide unsupported .NET embedding option.
+	if (p_option == "dotnet/embed_build_outputs") {
+		return false;
+	}
+
 	return true;
 }
 
@@ -1768,7 +1774,7 @@ Error EditorExportPlatformMacOS::export_project(const Ref<EditorExportPreset> &p
 
 		String pack_path = tmp_app_path_name + "/Contents/Resources/" + pkg_name + ".pck";
 		Vector<SharedObject> shared_objects;
-		err = save_pack(true, p_preset, p_debug, pack_path, &shared_objects);
+		err = save_pack(p_preset, p_debug, pack_path, &shared_objects);
 
 		bool lib_validation = p_preset->get("codesign/entitlements/disable_library_validation");
 		if (!shared_objects.is_empty() && sign_enabled && ad_hoc && !lib_validation) {
diff --git a/platform/macos/os_macos.h b/platform/macos/os_macos.h
index 8088c3431c..439e7d2fff 100644
--- a/platform/macos/os_macos.h
+++ b/platform/macos/os_macos.h
@@ -85,7 +85,7 @@ public:
 
 	virtual void alert(const String &p_alert, const String &p_title = "ALERT!") override;
 
-	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) override;
+	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr, bool p_generate_temp_files = false) override;
 
 	virtual MainLoop *get_main_loop() const override;
 
diff --git a/platform/macos/os_macos.mm b/platform/macos/os_macos.mm
index d9ad8f937a..261777a4e8 100644
--- a/platform/macos/os_macos.mm
+++ b/platform/macos/os_macos.mm
@@ -217,7 +217,7 @@ _FORCE_INLINE_ String OS_MacOS::get_framework_executable(const String &p_path) {
 	return p_path;
 }
 
-Error OS_MacOS::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path) {
+Error OS_MacOS::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path, bool p_generate_temp_files) {
 	String path = get_framework_executable(p_path);
 
 	if (!FileAccess::exists(path)) {
diff --git a/platform/web/export/export_plugin.cpp b/platform/web/export/export_plugin.cpp
index ec7f599507..41c969b5f4 100644
--- a/platform/web/export/export_plugin.cpp
+++ b/platform/web/export/export_plugin.cpp
@@ -479,7 +479,7 @@ Error EditorExportPlatformWeb::export_project(const Ref<EditorExportPreset> &p_p
 	// Export pck and shared objects
 	Vector<SharedObject> shared_objects;
 	String pck_path = base_path + ".pck";
-	Error error = save_pack(true, p_preset, p_debug, pck_path, &shared_objects);
+	Error error = save_pack(p_preset, p_debug, pck_path, &shared_objects);
 	if (error != OK) {
 		add_message(EXPORT_MESSAGE_ERROR, TTR("Export"), vformat(TTR("Could not write file: \"%s\"."), pck_path));
 		return error;
diff --git a/platform/web/os_web.cpp b/platform/web/os_web.cpp
index 4b93ce9e75..8fc89a79d9 100644
--- a/platform/web/os_web.cpp
+++ b/platform/web/os_web.cpp
@@ -243,7 +243,7 @@ bool OS_Web::is_userfs_persistent() const {
 	return idb_available;
 }
 
-Error OS_Web::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path) {
+Error OS_Web::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path, bool p_generate_temp_files) {
 	String path = p_path.get_file();
 	p_library_handle = dlopen(path.utf8().get_data(), RTLD_NOW);
 	ERR_FAIL_NULL_V_MSG(p_library_handle, ERR_CANT_OPEN, vformat("Can't open dynamic library: %s. Error: %s.", p_path, dlerror()));
diff --git a/platform/web/os_web.h b/platform/web/os_web.h
index 16f8ea8550..537b05048a 100644
--- a/platform/web/os_web.h
+++ b/platform/web/os_web.h
@@ -108,7 +108,7 @@ public:
 
 	void alert(const String &p_alert, const String &p_title = "ALERT!") override;
 
-	Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) override;
+	Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr, bool p_generate_temp_files = false) override;
 
 	void resume_audio();
 
diff --git a/platform/windows/SCsub b/platform/windows/SCsub
index cf6416b8da..159a273e70 100644
--- a/platform/windows/SCsub
+++ b/platform/windows/SCsub
@@ -17,6 +17,7 @@ common_win = [
     "joypad_windows.cpp",
     "tts_windows.cpp",
     "windows_terminal_logger.cpp",
+    "windows_utils.cpp",
     "native_menu_windows.cpp",
     "gl_manager_windows_native.cpp",
     "gl_manager_windows_angle.cpp",
diff --git a/platform/windows/display_server_windows.cpp b/platform/windows/display_server_windows.cpp
index 2e007b5efc..74665664b1 100644
--- a/platform/windows/display_server_windows.cpp
+++ b/platform/windows/display_server_windows.cpp
@@ -1895,7 +1895,7 @@ Size2i DisplayServerWindows::window_get_size_with_decorations(WindowID p_window)
 	return Size2();
 }
 
-void DisplayServerWindows::_get_window_style(bool p_main_window, bool p_fullscreen, bool p_multiwindow_fs, bool p_borderless, bool p_resizable, bool p_maximized, bool p_no_activate_focus, DWORD &r_style, DWORD &r_style_ex) {
+void DisplayServerWindows::_get_window_style(bool p_main_window, bool p_fullscreen, bool p_multiwindow_fs, bool p_borderless, bool p_resizable, bool p_maximized, bool p_maximized_fs, bool p_no_activate_focus, DWORD &r_style, DWORD &r_style_ex) {
 	// Windows docs for window styles:
 	// https://docs.microsoft.com/en-us/windows/win32/winmsg/window-styles
 	// https://docs.microsoft.com/en-us/windows/win32/winmsg/extended-window-styles
@@ -1909,7 +1909,17 @@ void DisplayServerWindows::_get_window_style(bool p_main_window, bool p_fullscre
 
 	if (p_fullscreen || p_borderless) {
 		r_style |= WS_POPUP; // p_borderless was WS_EX_TOOLWINDOW in the past.
-		if ((p_fullscreen && p_multiwindow_fs) || p_maximized) {
+		if (p_maximized) {
+			r_style |= WS_MAXIMIZE;
+		}
+		if (!p_fullscreen) {
+			r_style |= WS_SYSMENU | WS_MINIMIZEBOX;
+
+			if (p_resizable) {
+				r_style |= WS_MAXIMIZEBOX;
+			}
+		}
+		if ((p_fullscreen && p_multiwindow_fs) || p_maximized_fs) {
 			r_style |= WS_BORDER; // Allows child windows to be displayed on top of full screen.
 		}
 	} else {
@@ -1945,7 +1955,7 @@ void DisplayServerWindows::_update_window_style(WindowID p_window, bool p_repain
 	DWORD style = 0;
 	DWORD style_ex = 0;
 
-	_get_window_style(p_window == MAIN_WINDOW_ID, wd.fullscreen, wd.multiwindow_fs, wd.borderless, wd.resizable, wd.maximized, wd.no_focus || wd.is_popup, style, style_ex);
+	_get_window_style(p_window == MAIN_WINDOW_ID, wd.fullscreen, wd.multiwindow_fs, wd.borderless, wd.resizable, wd.maximized, wd.maximized_fs, wd.no_focus || wd.is_popup, style, style_ex);
 
 	SetWindowLongPtr(wd.hWnd, GWL_STYLE, style);
 	SetWindowLongPtr(wd.hWnd, GWL_EXSTYLE, style_ex);
@@ -1988,6 +1998,7 @@ void DisplayServerWindows::window_set_mode(WindowMode p_mode, WindowID p_window)
 			wd.pre_fs_valid = true;
 		}
 
+		ShowWindow(wd.hWnd, SW_RESTORE);
 		MoveWindow(wd.hWnd, rect.left, rect.top, rect.right - rect.left, rect.bottom - rect.top, TRUE);
 
 		if (restore_mouse_trails > 1) {
@@ -2023,7 +2034,7 @@ void DisplayServerWindows::window_set_mode(WindowMode p_mode, WindowID p_window)
 	}
 
 	if ((p_mode == WINDOW_MODE_FULLSCREEN || p_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN) && !wd.fullscreen) {
-		if (wd.minimized) {
+		if (wd.minimized || wd.maximized) {
 			ShowWindow(wd.hWnd, SW_RESTORE);
 		}
 		wd.was_maximized = wd.maximized;
@@ -3737,6 +3748,15 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 					min_max_info->ptMaxTrackSize.x = windows[window_id].max_size.x + decor.x;
 					min_max_info->ptMaxTrackSize.y = windows[window_id].max_size.y + decor.y;
 				}
+				if (windows[window_id].borderless) {
+					Rect2i screen_rect = screen_get_usable_rect(window_get_current_screen(window_id));
+
+					// Set the size of (borderless) maximized mode to exclude taskbar (or any other panel) if present.
+					min_max_info->ptMaxPosition.x = screen_rect.position.x;
+					min_max_info->ptMaxPosition.y = screen_rect.position.y;
+					min_max_info->ptMaxSize.x = screen_rect.size.x;
+					min_max_info->ptMaxSize.y = screen_rect.size.y;
+				}
 				return 0;
 			}
 		} break;
@@ -3788,9 +3808,15 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 				case SC_MONITORPOWER: // Monitor trying to enter powersave?
 					return 0; // Prevent from happening.
 				case SC_KEYMENU:
-					if ((lParam >> 16) <= 0) {
+					Engine *engine = Engine::get_singleton();
+					if (((lParam >> 16) <= 0) && !engine->is_project_manager_hint() && !engine->is_editor_hint() && !GLOBAL_GET("application/run/enable_alt_space_menu")) {
+						return 0;
+					}
+					if (!alt_mem || !(GetAsyncKeyState(VK_SPACE) & (1 << 15))) {
 						return 0;
 					}
+					SendMessage(windows[window_id].hWnd, WM_SYSKEYUP, VK_SPACE, 0);
+					SendMessage(windows[window_id].hWnd, WM_SYSKEYUP, VK_MENU, 0);
 			}
 		} break;
 		case WM_INDICATOR_CALLBACK_MESSAGE: {
@@ -3930,7 +3956,7 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 				}
 				mm->set_relative_screen_position(mm->get_relative());
 
-				if ((windows[window_id].window_has_focus || windows[window_id].is_popup) && mm->get_relative() != Vector2()) {
+				if ((windows[window_id].window_focused || windows[window_id].is_popup) && mm->get_relative() != Vector2()) {
 					Input::get_singleton()->parse_input_event(mm);
 				}
 			}
@@ -3978,7 +4004,7 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 					windows[window_id].last_pen_inverted = inverted;
 
 					// Don't calculate relative mouse movement if we don't have focus in CAPTURED mode.
-					if (!windows[window_id].window_has_focus && mouse_mode == MOUSE_MODE_CAPTURED) {
+					if (!windows[window_id].window_focused && mouse_mode == MOUSE_MODE_CAPTURED) {
 						break;
 					}
 
@@ -4028,7 +4054,7 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 					mm->set_relative_screen_position(mm->get_relative());
 					old_x = mm->get_position().x;
 					old_y = mm->get_position().y;
-					if (windows[window_id].window_has_focus || window_get_active_popup() == window_id) {
+					if (windows[window_id].window_focused || window_get_active_popup() == window_id) {
 						Input::get_singleton()->parse_input_event(mm);
 					}
 				}
@@ -4114,7 +4140,7 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 			}
 
 			// Don't calculate relative mouse movement if we don't have focus in CAPTURED mode.
-			if (!windows[window_id].window_has_focus && mouse_mode == MOUSE_MODE_CAPTURED) {
+			if (!windows[window_id].window_focused && mouse_mode == MOUSE_MODE_CAPTURED) {
 				break;
 			}
 
@@ -4177,7 +4203,7 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 			mm->set_relative_screen_position(mm->get_relative());
 			old_x = mm->get_position().x;
 			old_y = mm->get_position().y;
-			if (windows[window_id].window_has_focus || window_get_active_popup() == window_id) {
+			if (windows[window_id].window_focused || window_get_active_popup() == window_id) {
 				Input::get_singleton()->parse_input_event(mm);
 			}
 
@@ -4229,7 +4255,7 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 			}
 
 			// Don't calculate relative mouse movement if we don't have focus in CAPTURED mode.
-			if (!windows[window_id].window_has_focus && mouse_mode == MOUSE_MODE_CAPTURED) {
+			if (!windows[window_id].window_focused && mouse_mode == MOUSE_MODE_CAPTURED) {
 				break;
 			}
 
@@ -4521,10 +4547,23 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 					window.minimized = true;
 				} else if (IsZoomed(hWnd)) {
 					window.maximized = true;
+
+					// If maximized_window_size == screen_size add 1px border to prevent switching to exclusive_fs.
+					if (!window.maximized_fs && window.borderless && window_rect.position == screen_position && window_rect.size == screen_size) {
+						// Window (borderless) was just maximized and the covers the entire screen.
+						window.maximized_fs = true;
+						_update_window_style(window_id, false);
+					}
 				} else if (window_rect.position == screen_position && window_rect.size == screen_size) {
 					window.fullscreen = true;
 				}
 
+				if (window.maximized_fs && !window.maximized) {
+					// Window (maximized and covering fullscreen) was just non-maximized.
+					window.maximized_fs = false;
+					_update_window_style(window_id, false);
+				}
+
 				if (!window.minimized) {
 					window.width = window_client_rect.size.width;
 					window.height = window_client_rect.size.height;
@@ -4725,7 +4764,7 @@ LRESULT DisplayServerWindows::WndProc(HWND hWnd, UINT uMsg, WPARAM wParam, LPARA
 		} break;
 		case WM_SETCURSOR: {
 			if (LOWORD(lParam) == HTCLIENT) {
-				if (windows[window_id].window_has_focus && (mouse_mode == MOUSE_MODE_HIDDEN || mouse_mode == MOUSE_MODE_CAPTURED || mouse_mode == MOUSE_MODE_CONFINED_HIDDEN)) {
+				if (windows[window_id].window_focused && (mouse_mode == MOUSE_MODE_HIDDEN || mouse_mode == MOUSE_MODE_CAPTURED || mouse_mode == MOUSE_MODE_CONFINED_HIDDEN)) {
 					// Hide the cursor.
 					if (hCursor == nullptr) {
 						hCursor = SetCursor(nullptr);
@@ -5004,7 +5043,7 @@ DisplayServer::WindowID DisplayServerWindows::_create_window(WindowMode p_mode,
 	DWORD dwExStyle;
 	DWORD dwStyle;
 
-	_get_window_style(window_id_counter == MAIN_WINDOW_ID, (p_mode == WINDOW_MODE_FULLSCREEN || p_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN), p_mode != WINDOW_MODE_EXCLUSIVE_FULLSCREEN, p_flags & WINDOW_FLAG_BORDERLESS_BIT, !(p_flags & WINDOW_FLAG_RESIZE_DISABLED_BIT), p_mode == WINDOW_MODE_MAXIMIZED, (p_flags & WINDOW_FLAG_NO_FOCUS_BIT) | (p_flags & WINDOW_FLAG_POPUP), dwStyle, dwExStyle);
+	_get_window_style(window_id_counter == MAIN_WINDOW_ID, (p_mode == WINDOW_MODE_FULLSCREEN || p_mode == WINDOW_MODE_EXCLUSIVE_FULLSCREEN), p_mode != WINDOW_MODE_EXCLUSIVE_FULLSCREEN, p_flags & WINDOW_FLAG_BORDERLESS_BIT, !(p_flags & WINDOW_FLAG_RESIZE_DISABLED_BIT), p_mode == WINDOW_MODE_MAXIMIZED, false, (p_flags & WINDOW_FLAG_NO_FOCUS_BIT) | (p_flags & WINDOW_FLAG_POPUP), dwStyle, dwExStyle);
 
 	RECT WindowRect;
 
@@ -5237,6 +5276,12 @@ DisplayServer::WindowID DisplayServerWindows::_create_window(WindowMode p_mode,
 			wd.height = p_rect.size.height;
 		}
 
+		// Set size of maximized borderless window (by default it covers the entire screen).
+		if (p_mode == WINDOW_MODE_MAXIMIZED && (p_flags & WINDOW_FLAG_BORDERLESS_BIT)) {
+			Rect2i srect = screen_get_usable_rect(rq_screen);
+			SetWindowPos(wd.hWnd, HWND_TOP, srect.position.x, srect.position.y, srect.size.width, srect.size.height, SWP_NOZORDER | SWP_NOACTIVATE);
+		}
+
 		window_id_counter++;
 	}
 
diff --git a/platform/windows/display_server_windows.h b/platform/windows/display_server_windows.h
index cb209948b1..4792b65801 100644
--- a/platform/windows/display_server_windows.h
+++ b/platform/windows/display_server_windows.h
@@ -382,6 +382,7 @@ class DisplayServerWindows : public DisplayServer {
 		bool pre_fs_valid = false;
 		RECT pre_fs_rect;
 		bool maximized = false;
+		bool maximized_fs = false;
 		bool minimized = false;
 		bool fullscreen = false;
 		bool multiwindow_fs = false;
@@ -391,7 +392,6 @@ class DisplayServerWindows : public DisplayServer {
 		bool was_maximized = false;
 		bool always_on_top = false;
 		bool no_focus = false;
-		bool window_has_focus = false;
 		bool exclusive = false;
 		bool context_created = false;
 		bool mpass = false;
@@ -471,7 +471,7 @@ class DisplayServerWindows : public DisplayServer {
 	HashMap<IndicatorID, IndicatorData> indicators;
 
 	void _send_window_event(const WindowData &wd, WindowEvent p_event);
-	void _get_window_style(bool p_main_window, bool p_fullscreen, bool p_multiwindow_fs, bool p_borderless, bool p_resizable, bool p_maximized, bool p_no_activate_focus, DWORD &r_style, DWORD &r_style_ex);
+	void _get_window_style(bool p_main_window, bool p_fullscreen, bool p_multiwindow_fs, bool p_borderless, bool p_resizable, bool p_maximized, bool p_maximized_fs, bool p_no_activate_focus, DWORD &r_style, DWORD &r_style_ex);
 
 	MouseMode mouse_mode;
 	int restore_mouse_trails = 0;
diff --git a/platform/windows/doc_classes/EditorExportPlatformWindows.xml b/platform/windows/doc_classes/EditorExportPlatformWindows.xml
index 1239a2b32f..06b272c10e 100644
--- a/platform/windows/doc_classes/EditorExportPlatformWindows.xml
+++ b/platform/windows/doc_classes/EditorExportPlatformWindows.xml
@@ -4,6 +4,7 @@
 		Exporter for Windows.
 	</brief_description>
 	<description>
+		The Windows exporter customizes how a Windows build is handled. In the editor's "Export" window, it is created when adding a new "Windows" preset.
 	</description>
 	<tutorials>
 		<link title="Exporting for Windows">$DOCS_URL/tutorials/export/exporting_for_windows.html</link>
diff --git a/platform/windows/export/export_plugin.cpp b/platform/windows/export/export_plugin.cpp
index fe2a930cc8..a3c86611a4 100644
--- a/platform/windows/export/export_plugin.cpp
+++ b/platform/windows/export/export_plugin.cpp
@@ -367,11 +367,17 @@ String EditorExportPlatformWindows::get_export_option_warning(const EditorExport
 }
 
 bool EditorExportPlatformWindows::get_export_option_visibility(const EditorExportPreset *p_preset, const String &p_option) const {
+	if (p_preset == nullptr) {
+		return true;
+	}
+
 	// This option is not supported by "osslsigncode", used on non-Windows host.
 	if (!OS::get_singleton()->has_feature("windows") && p_option == "codesign/identity_type") {
 		return false;
 	}
 
+	bool advanced_options_enabled = p_preset->are_advanced_options_enabled();
+
 	// Hide codesign.
 	bool codesign = p_preset->get("codesign/enable");
 	if (!codesign && p_option != "codesign/enable" && p_option.begins_with("codesign/")) {
@@ -390,6 +396,9 @@ bool EditorExportPlatformWindows::get_export_option_visibility(const EditorExpor
 		return false;
 	}
 
+	if (p_option == "dotnet/embed_build_outputs") {
+		return advanced_options_enabled;
+	}
 	return true;
 }
 
diff --git a/platform/windows/os_windows.cpp b/platform/windows/os_windows.cpp
index c39b327953..e0e35c6e0f 100644
--- a/platform/windows/os_windows.cpp
+++ b/platform/windows/os_windows.cpp
@@ -34,6 +34,7 @@
 #include "joypad_windows.h"
 #include "lang_table.h"
 #include "windows_terminal_logger.h"
+#include "windows_utils.h"
 
 #include "core/debugger/engine_debugger.h"
 #include "core/debugger/script_debugger.h"
@@ -269,6 +270,10 @@ void OS_Windows::finalize() {
 }
 
 void OS_Windows::finalize_core() {
+	while (!temp_libraries.is_empty()) {
+		_remove_temp_library(temp_libraries.last()->key);
+	}
+
 	FileAccessWindows::finalize();
 
 	timeEndPeriod(1);
@@ -354,7 +359,7 @@ void debug_dynamic_library_check_dependencies(const String &p_root_path, const S
 }
 #endif
 
-Error OS_Windows::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path) {
+Error OS_Windows::open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path, String *r_resolved_path, bool p_generate_temp_files) {
 	String path = p_path.replace("/", "\\");
 
 	if (!FileAccess::exists(path)) {
@@ -364,6 +369,35 @@ Error OS_Windows::open_dynamic_library(const String &p_path, void *&p_library_ha
 
 	ERR_FAIL_COND_V(!FileAccess::exists(path), ERR_FILE_NOT_FOUND);
 
+	// Here we want a copy to be loaded.
+	// This is so the original file isn't locked and can be updated by a compiler.
+	if (p_generate_temp_files) {
+		// Copy the file to the same directory as the original with a prefix in the name.
+		// This is so relative path to dependencies are satisfied.
+		String copy_path = path.get_base_dir().path_join("~" + path.get_file());
+
+		// If there's a left-over copy (possibly from a crash) then delete it first.
+		if (FileAccess::exists(copy_path)) {
+			DirAccess::remove_absolute(copy_path);
+		}
+
+		Error copy_err = DirAccess::copy_absolute(path, copy_path);
+		if (copy_err) {
+			ERR_PRINT("Error copying library: " + path);
+			return ERR_CANT_CREATE;
+		}
+
+		FileAccess::set_hidden_attribute(copy_path, true);
+
+		// Save the copied path so it can be deleted later.
+		path = copy_path;
+
+		Error pdb_err = WindowsUtils::copy_and_rename_pdb(path);
+		if (pdb_err != OK && pdb_err != ERR_SKIP) {
+			WARN_PRINT(vformat("Failed to rename the PDB file. The original PDB file for '%s' will be loaded.", path));
+		}
+	}
+
 	typedef DLL_DIRECTORY_COOKIE(WINAPI * PAddDllDirectory)(PCWSTR);
 	typedef BOOL(WINAPI * PRemoveDllDirectory)(DLL_DIRECTORY_COOKIE);
 
@@ -378,8 +412,12 @@ Error OS_Windows::open_dynamic_library(const String &p_path, void *&p_library_ha
 	}
 
 	p_library_handle = (void *)LoadLibraryExW((LPCWSTR)(path.utf16().get_data()), nullptr, (p_also_set_library_path && has_dll_directory_api) ? LOAD_LIBRARY_SEARCH_DEFAULT_DIRS : 0);
-#ifdef DEBUG_ENABLED
 	if (!p_library_handle) {
+		if (p_generate_temp_files) {
+			DirAccess::remove_absolute(path);
+		}
+
+#ifdef DEBUG_ENABLED
 		DWORD err_code = GetLastError();
 
 		HashSet<String> checekd_libs;
@@ -397,8 +435,10 @@ Error OS_Windows::open_dynamic_library(const String &p_path, void *&p_library_ha
 		} else {
 			ERR_FAIL_V_MSG(ERR_CANT_OPEN, vformat("Can't open dynamic library: %s. Error: %s.", p_path, format_error_message(err_code)));
 		}
+#endif
 	}
-#else
+
+#ifndef DEBUG_ENABLED
 	ERR_FAIL_NULL_V_MSG(p_library_handle, ERR_CANT_OPEN, vformat("Can't open dynamic library: %s. Error: %s.", p_path, format_error_message(GetLastError())));
 #endif
 
@@ -410,6 +450,10 @@ Error OS_Windows::open_dynamic_library(const String &p_path, void *&p_library_ha
 		*r_resolved_path = path;
 	}
 
+	if (p_generate_temp_files) {
+		temp_libraries[p_library_handle] = path;
+	}
+
 	return OK;
 }
 
@@ -417,9 +461,22 @@ Error OS_Windows::close_dynamic_library(void *p_library_handle) {
 	if (!FreeLibrary((HMODULE)p_library_handle)) {
 		return FAILED;
 	}
+
+	// Delete temporary copy of library if it exists.
+	_remove_temp_library(p_library_handle);
+
 	return OK;
 }
 
+void OS_Windows::_remove_temp_library(void *p_library_handle) {
+	if (temp_libraries.has(p_library_handle)) {
+		String path = temp_libraries[p_library_handle];
+		DirAccess::remove_absolute(path);
+		WindowsUtils::remove_temp_pdbs(path);
+		temp_libraries.erase(p_library_handle);
+	}
+}
+
 Error OS_Windows::get_dynamic_library_symbol_handle(void *p_library_handle, const String &p_name, void *&p_symbol_handle, bool p_optional) {
 	p_symbol_handle = (void *)GetProcAddress((HMODULE)p_library_handle, p_name.utf8().get_data());
 	if (!p_symbol_handle) {
diff --git a/platform/windows/os_windows.h b/platform/windows/os_windows.h
index c703a3ddca..c506af13b2 100644
--- a/platform/windows/os_windows.h
+++ b/platform/windows/os_windows.h
@@ -127,6 +127,9 @@ class OS_Windows : public OS {
 	bool dwrite_init = false;
 	bool dwrite2_init = false;
 
+	HashMap<void *, String> temp_libraries;
+
+	void _remove_temp_library(void *p_library_handle);
 	String _get_default_fontname(const String &p_font_name) const;
 	DWRITE_FONT_WEIGHT _weight_to_dw(int p_weight) const;
 	DWRITE_FONT_STRETCH _stretch_to_dw(int p_stretch) const;
@@ -155,7 +158,7 @@ public:
 
 	virtual Error get_entropy(uint8_t *r_buffer, int p_bytes) override;
 
-	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr) override;
+	virtual Error open_dynamic_library(const String &p_path, void *&p_library_handle, bool p_also_set_library_path = false, String *r_resolved_path = nullptr, bool p_generate_temp_files = false) override;
 	virtual Error close_dynamic_library(void *p_library_handle) override;
 	virtual Error get_dynamic_library_symbol_handle(void *p_library_handle, const String &p_name, void *&p_symbol_handle, bool p_optional = false) override;
 
diff --git a/platform/windows/windows_utils.cpp b/platform/windows/windows_utils.cpp
new file mode 100644
index 0000000000..147b8bcbae
--- /dev/null
+++ b/platform/windows/windows_utils.cpp
@@ -0,0 +1,280 @@
+/**************************************************************************/
+/*  windows_utils.cpp                                                     */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "windows_utils.h"
+
+#ifdef WINDOWS_ENABLED
+
+#include "core/error/error_macros.h"
+#include "core/io/dir_access.h"
+#include "core/io/file_access.h"
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef FAILED // Overrides Error::FAILED
+
+// dbghelp is linked only in DEBUG_ENABLED builds.
+#ifdef DEBUG_ENABLED
+#include <dbghelp.h>
+#endif
+#include <winnt.h>
+
+HashMap<String, Vector<String>> WindowsUtils::temp_pdbs;
+
+Error WindowsUtils::copy_and_rename_pdb(const String &p_dll_path) {
+#ifdef DEBUG_ENABLED
+	// 1000 ought to be enough for anybody, in case the debugger does not unblock previous PDBs.
+	// Usually no more than 2 will be used.
+	const int max_pdb_names = 1000;
+
+	struct PDBResourceInfo {
+		uint32_t address = 0;
+		String path;
+	} pdb_info;
+
+	// Open and read the PDB information if available.
+	{
+		ULONG dbg_info_size = 0;
+		DWORD dbg_info_position = 0;
+
+		{
+			// The custom LoadLibraryExW is used instead of open_dynamic_library
+			// to avoid loading the original PDB into the debugger.
+			HMODULE library_ptr = LoadLibraryExW((LPCWSTR)(p_dll_path.utf16().get_data()), NULL, LOAD_LIBRARY_AS_DATAFILE_EXCLUSIVE);
+
+			ERR_FAIL_NULL_V_MSG(library_ptr, ERR_FILE_CANT_OPEN, vformat("Failed to load library '%s'.", p_dll_path));
+
+			IMAGE_DEBUG_DIRECTORY *dbg_dir = (IMAGE_DEBUG_DIRECTORY *)ImageDirectoryEntryToDataEx(library_ptr, FALSE, IMAGE_DIRECTORY_ENTRY_DEBUG, &dbg_info_size, NULL);
+
+			bool has_debug = dbg_dir && dbg_dir->Type == IMAGE_DEBUG_TYPE_CODEVIEW;
+			if (has_debug) {
+				dbg_info_position = dbg_dir->PointerToRawData;
+				dbg_info_size = dbg_dir->SizeOfData;
+			}
+
+			ERR_FAIL_COND_V_MSG(!FreeLibrary((HMODULE)library_ptr), FAILED, vformat("Failed to free library '%s'.", p_dll_path));
+
+			if (!has_debug) {
+				// Skip with no debugging symbols.
+				return ERR_SKIP;
+			}
+		}
+
+		struct CV_HEADER {
+			DWORD Signature;
+			DWORD Offset;
+		};
+
+		const DWORD nb10_magic = '01BN';
+		struct CV_INFO_PDB20 {
+			CV_HEADER CvHeader; // CvHeader.Signature = "NB10"
+			DWORD Signature;
+			DWORD Age;
+			BYTE PdbFileName[1];
+		};
+
+		const DWORD rsds_magic = 'SDSR';
+		struct CV_INFO_PDB70 {
+			DWORD Signature; // "RSDS"
+			BYTE Guid[16];
+			DWORD Age;
+			BYTE PdbFileName[1];
+		};
+
+		Vector<uint8_t> dll_data;
+
+		{
+			Error err = OK;
+			Ref<FileAccess> file = FileAccess::open(p_dll_path, FileAccess::READ, &err);
+			ERR_FAIL_COND_V_MSG(err != OK, err, vformat("Failed to read library '%s'.", p_dll_path));
+
+			file->seek(dbg_info_position);
+			dll_data = file->get_buffer(dbg_info_size);
+			ERR_FAIL_COND_V_MSG(file->get_error() != OK, file->get_error(), vformat("Failed to read data from library '%s'.", p_dll_path));
+		}
+
+		const char *raw_pdb_path = nullptr;
+		int raw_pdb_offset = 0;
+		DWORD *pdb_info_signature = (DWORD *)dll_data.ptr();
+
+		if (*pdb_info_signature == rsds_magic) {
+			raw_pdb_path = (const char *)(((CV_INFO_PDB70 *)pdb_info_signature)->PdbFileName);
+			raw_pdb_offset = offsetof(CV_INFO_PDB70, PdbFileName);
+		} else if (*pdb_info_signature == nb10_magic) {
+			// Not even sure if this format still exists anywhere...
+			raw_pdb_path = (const char *)(((CV_INFO_PDB20 *)pdb_info_signature)->PdbFileName);
+			raw_pdb_offset = offsetof(CV_INFO_PDB20, PdbFileName);
+		} else {
+			ERR_FAIL_V_MSG(FAILED, vformat("Unknown PDB format in '%s'.", p_dll_path));
+		}
+
+		String utf_path;
+		Error err = utf_path.parse_utf8(raw_pdb_path);
+		ERR_FAIL_COND_V_MSG(err != OK, err, vformat("Failed to read PDB path from '%s'.", p_dll_path));
+
+		pdb_info.path = utf_path;
+		pdb_info.address = dbg_info_position + raw_pdb_offset;
+	}
+
+	String dll_base_dir = p_dll_path.get_base_dir();
+	String copy_pdb_path = pdb_info.path;
+
+	// Attempting to find the PDB by absolute and relative paths.
+	if (copy_pdb_path.is_relative_path()) {
+		copy_pdb_path = dll_base_dir.path_join(copy_pdb_path);
+		if (!FileAccess::exists(copy_pdb_path)) {
+			copy_pdb_path = dll_base_dir.path_join(copy_pdb_path.get_file());
+		}
+	} else if (!FileAccess::exists(copy_pdb_path)) {
+		copy_pdb_path = dll_base_dir.path_join(copy_pdb_path.get_file());
+	}
+	ERR_FAIL_COND_V_MSG(!FileAccess::exists(copy_pdb_path), FAILED, vformat("File '%s' does not exist.", copy_pdb_path));
+
+	String new_pdb_base_name = p_dll_path.get_file().get_basename() + "_";
+
+	// Checking the available space for the updated string
+	// and trying to shorten it if there is not much space.
+	{
+		// e.g. 999.pdb
+		const uint8_t suffix_size = String::num_characters((int64_t)max_pdb_names - 1) + 4;
+		// e.g. ~lib_ + 1 for the \0
+		const uint8_t min_base_size = 5 + 1;
+		int original_path_size = pdb_info.path.utf8().length();
+		CharString utf8_name = new_pdb_base_name.utf8();
+		int new_expected_buffer_size = utf8_name.length() + suffix_size;
+
+		// Since we have limited space inside the DLL to patch the path to the PDB,
+		// it is necessary to limit the size based on the number of bytes occupied by the string.
+		if (new_expected_buffer_size > original_path_size) {
+			ERR_FAIL_COND_V_MSG(original_path_size < min_base_size + suffix_size, FAILED, vformat("The original PDB path size in bytes is too small: '%s'. Expected size: %d or more bytes, but available %d.", pdb_info.path, min_base_size + suffix_size, original_path_size));
+
+			utf8_name.resize(original_path_size - suffix_size + 1); // +1 for the \0
+			utf8_name[utf8_name.size() - 1] = '\0';
+			new_pdb_base_name.parse_utf8(utf8_name);
+			new_pdb_base_name[new_pdb_base_name.length() - 1] = '_'; // Restore the last '_'
+			WARN_PRINT(vformat("The original path size of '%s' in bytes was too small to fit the new name, so it was shortened to '%s%d.pdb'.", pdb_info.path, new_pdb_base_name, max_pdb_names - 1));
+		}
+	}
+
+	// Delete old PDB files.
+	for (const String &file : DirAccess::get_files_at(dll_base_dir)) {
+		if (file.begins_with(new_pdb_base_name) && file.ends_with(".pdb")) {
+			String path = dll_base_dir.path_join(file);
+
+			// Just try to delete without showing any errors.
+			Error err = DirAccess::remove_absolute(path);
+			if (err == OK && temp_pdbs[p_dll_path].has(path)) {
+				temp_pdbs[p_dll_path].erase(path);
+			}
+		}
+	}
+
+	// Try to copy PDB with new name and patch DLL.
+	Ref<DirAccess> d = DirAccess::create(DirAccess::ACCESS_FILESYSTEM);
+	for (int i = 0; i < max_pdb_names; i++) {
+		String new_pdb_name = vformat("%s%d.pdb", new_pdb_base_name, i);
+		String new_pdb_path = dll_base_dir.path_join(new_pdb_name);
+		Error err = OK;
+
+		Ref<FileAccess> test_pdb_is_locked = FileAccess::open(new_pdb_path, FileAccess::READ_WRITE, &err);
+		if (err == ERR_FILE_CANT_OPEN) {
+			// If the file is blocked, continue searching.
+			continue;
+		} else if (err != OK && err != ERR_FILE_NOT_FOUND) {
+			ERR_FAIL_V_MSG(err, vformat("Failed to open '%s' to check if it is locked.", new_pdb_path));
+		}
+
+		err = d->copy(copy_pdb_path, new_pdb_path);
+		ERR_FAIL_COND_V_MSG(err != OK, err, vformat("Failed to copy PDB from '%s' to '%s'.", copy_pdb_path, new_pdb_path));
+		temp_pdbs[p_dll_path].append(new_pdb_path);
+
+		Ref<FileAccess> file = FileAccess::open(p_dll_path, FileAccess::READ_WRITE, &err);
+		ERR_FAIL_COND_V_MSG(err != OK, err, vformat("Failed to open '%s' to patch the PDB path.", p_dll_path));
+
+		int original_path_size = pdb_info.path.utf8().length();
+		// Double-check file bounds.
+		ERR_FAIL_INDEX_V_MSG(pdb_info.address + original_path_size, file->get_length(), FAILED, vformat("Failed to write a new PDB path. Probably '%s' has been changed.", p_dll_path));
+
+		Vector<uint8_t> u8 = new_pdb_name.to_utf8_buffer();
+		file->seek(pdb_info.address);
+		file->store_buffer(u8);
+
+		// Terminate string and fill the remaining part of the original string with the '\0'.
+		// Can be replaced by file->store_8('\0');
+		Vector<uint8_t> padding_buffer;
+		padding_buffer.resize((int64_t)original_path_size - u8.size());
+		padding_buffer.fill('\0');
+		file->store_buffer(padding_buffer);
+		ERR_FAIL_COND_V_MSG(err != OK, err, vformat("Failed to write a new PDB path to '%s'.", p_dll_path));
+
+		return OK;
+	}
+
+	ERR_FAIL_V_MSG(FAILED, vformat("Failed to find an unblocked PDB name for '%s' among %d files.", p_dll_path, max_pdb_names));
+#else
+	WARN_PRINT_ONCE("Renaming PDB files is only available in debug builds. If your libraries use PDB files, then the original ones will be used.");
+	return ERR_SKIP;
+#endif
+}
+
+void WindowsUtils::remove_temp_pdbs(const String &p_dll_path) {
+#ifdef DEBUG_ENABLED
+	if (temp_pdbs.has(p_dll_path)) {
+		Vector<String> removed;
+		int failed = 0;
+		const int failed_limit = 10;
+		for (const String &pdb : temp_pdbs[p_dll_path]) {
+			if (FileAccess::exists(pdb)) {
+				Error err = DirAccess::remove_absolute(pdb);
+				if (err == OK) {
+					removed.append(pdb);
+				} else {
+					failed++;
+					if (failed <= failed_limit) {
+						print_verbose("Failed to remove temp PDB: " + pdb);
+					}
+				}
+			} else {
+				removed.append(pdb);
+			}
+		}
+
+		if (failed > failed_limit) {
+			print_verbose(vformat("And %d more PDB files could not be removed....", failed - failed_limit));
+		}
+
+		for (const String &pdb : removed) {
+			temp_pdbs[p_dll_path].erase(pdb);
+		}
+	}
+#endif
+}
+
+#endif // WINDOWS_ENABLED
diff --git a/scene/main/node.compat.inc b/platform/windows/windows_utils.h
index 69ece1a40d..3f5d75294e 100644
--- a/scene/main/node.compat.inc
+++ b/platform/windows/windows_utils.h
@@ -1,5 +1,5 @@
 /**************************************************************************/
-/*  node.compat.inc                                                       */
+/*  windows_utils.h                                                       */
 /**************************************************************************/
 /*                         This file is part of:                          */
 /*                             GODOT ENGINE                               */
@@ -28,14 +28,22 @@
 /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
 /**************************************************************************/
 
-#ifndef DISABLE_DEPRECATED
+#ifndef WINDOWS_UTILS_H
+#define WINDOWS_UTILS_H
 
-void Node::_replace_by_bind_compat_89992(Node *p_node, bool p_keep_data) {
-	replace_by(p_node, p_keep_data, true);
-}
+#ifdef WINDOWS_ENABLED
 
-void Node::_bind_compatibility_methods() {
-	ClassDB::bind_compatibility_method(D_METHOD("replace_by", "node", "keep_groups"), &Node::_replace_by_bind_compat_89992, DEFVAL(false));
-}
+#include "core/string/ustring.h"
+#include "core/templates/hash_map.h"
 
-#endif
+class WindowsUtils {
+	static HashMap<String, Vector<String>> temp_pdbs;
+
+public:
+	static Error copy_and_rename_pdb(const String &p_dll_path);
+	static void remove_temp_pdbs(const String &p_dll_path);
+};
+
+#endif // WINDOWS_ENABLED
+
+#endif // WINDOWS_UTILS_H
diff --git a/scene/2d/light_2d.cpp b/scene/2d/light_2d.cpp
index 79732d7861..5ce26b3ed4 100644
--- a/scene/2d/light_2d.cpp
+++ b/scene/2d/light_2d.cpp
@@ -203,7 +203,7 @@ void Light2D::_physics_interpolated_changed() {
 
 void Light2D::_notification(int p_what) {
 	switch (p_what) {
-		case NOTIFICATION_ENTER_TREE: {
+		case NOTIFICATION_ENTER_CANVAS: {
 			RS::get_singleton()->canvas_light_attach_to_canvas(canvas_light, get_canvas());
 			_update_light_visibility();
 		} break;
@@ -227,7 +227,7 @@ void Light2D::_notification(int p_what) {
 			}
 		} break;
 
-		case NOTIFICATION_EXIT_TREE: {
+		case NOTIFICATION_EXIT_CANVAS: {
 			RS::get_singleton()->canvas_light_attach_to_canvas(canvas_light, RID());
 			_update_light_visibility();
 		} break;
diff --git a/scene/2d/marker_2d.cpp b/scene/2d/marker_2d.cpp
index 39026591b0..b1b9705bf8 100644
--- a/scene/2d/marker_2d.cpp
+++ b/scene/2d/marker_2d.cpp
@@ -48,7 +48,7 @@ void Marker2D::_draw_cross() {
 	// Use a darkened axis color for the negative axis.
 	// This makes it possible to see in which direction the Marker3D node is rotated
 	// (which can be important depending on how it's used).
-	// Axis colors are taken from `axis_x_color` and `axis_y_color` (defined in `editor/editor_themes.cpp`).
+	// Axis colors are taken from `axis_x_color` and `axis_y_color` (defined in `editor/themes/editor_theme_manager.cpp`).
 	const Color color_x = Color(0.96, 0.20, 0.32);
 	const Color color_y = Color(0.53, 0.84, 0.01);
 	PackedColorArray colors = {
diff --git a/scene/2d/physics/joints/joint_2d.cpp b/scene/2d/physics/joints/joint_2d.cpp
index dd1697a29c..1afac7c150 100644
--- a/scene/2d/physics/joints/joint_2d.cpp
+++ b/scene/2d/physics/joints/joint_2d.cpp
@@ -136,7 +136,7 @@ void Joint2D::set_node_a(const NodePath &p_node_a) {
 	if (Engine::get_singleton()->is_editor_hint()) {
 		// When in editor, the setter may be called as a result of node rename.
 		// It happens before the node actually changes its name, which triggers false warning.
-		callable_mp(this, &Joint2D::_update_joint).call_deferred();
+		callable_mp(this, &Joint2D::_update_joint).call_deferred(false);
 	} else {
 		_update_joint();
 	}
@@ -157,7 +157,7 @@ void Joint2D::set_node_b(const NodePath &p_node_b) {
 
 	b = p_node_b;
 	if (Engine::get_singleton()->is_editor_hint()) {
-		callable_mp(this, &Joint2D::_update_joint).call_deferred();
+		callable_mp(this, &Joint2D::_update_joint).call_deferred(false);
 	} else {
 		_update_joint();
 	}
diff --git a/scene/3d/mesh_instance_3d.cpp b/scene/3d/mesh_instance_3d.cpp
index 8635240655..616fb18d53 100644
--- a/scene/3d/mesh_instance_3d.cpp
+++ b/scene/3d/mesh_instance_3d.cpp
@@ -204,6 +204,10 @@ Ref<Skin> MeshInstance3D::get_skin() const {
 	return skin;
 }
 
+Ref<SkinReference> MeshInstance3D::get_skin_reference() const {
+	return skin_ref;
+}
+
 void MeshInstance3D::set_skeleton_path(const NodePath &p_skeleton) {
 	skeleton_path = p_skeleton;
 	if (!is_inside_tree()) {
@@ -518,6 +522,7 @@ void MeshInstance3D::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("get_skeleton_path"), &MeshInstance3D::get_skeleton_path);
 	ClassDB::bind_method(D_METHOD("set_skin", "skin"), &MeshInstance3D::set_skin);
 	ClassDB::bind_method(D_METHOD("get_skin"), &MeshInstance3D::get_skin);
+	ClassDB::bind_method(D_METHOD("get_skin_reference"), &MeshInstance3D::get_skin_reference);
 
 	ClassDB::bind_method(D_METHOD("get_surface_override_material_count"), &MeshInstance3D::get_surface_override_material_count);
 	ClassDB::bind_method(D_METHOD("set_surface_override_material", "surface", "material"), &MeshInstance3D::set_surface_override_material);
diff --git a/scene/3d/mesh_instance_3d.h b/scene/3d/mesh_instance_3d.h
index add6bfe15e..d6ae1291d3 100644
--- a/scene/3d/mesh_instance_3d.h
+++ b/scene/3d/mesh_instance_3d.h
@@ -75,6 +75,8 @@ public:
 	void set_skeleton_path(const NodePath &p_skeleton);
 	NodePath get_skeleton_path();
 
+	Ref<SkinReference> get_skin_reference() const;
+
 	int get_blend_shape_count() const;
 	int find_blend_shape_by_name(const StringName &p_name);
 	float get_blend_shape_value(int p_blend_shape) const;
diff --git a/scene/3d/reflection_probe.cpp b/scene/3d/reflection_probe.cpp
index b4dd6d09be..b6ec55286d 100644
--- a/scene/3d/reflection_probe.cpp
+++ b/scene/3d/reflection_probe.cpp
@@ -190,17 +190,6 @@ AABB ReflectionProbe::get_aabb() const {
 	return aabb;
 }
 
-PackedStringArray ReflectionProbe::get_configuration_warnings() const {
-	PackedStringArray warnings = Node::get_configuration_warnings();
-
-	if (OS::get_singleton()->get_current_rendering_method() == "gl_compatibility") {
-		warnings.push_back(RTR("ReflectionProbes are not supported when using the GL Compatibility backend yet. Support will be added in a future release."));
-		return warnings;
-	}
-
-	return warnings;
-}
-
 void ReflectionProbe::_validate_property(PropertyInfo &p_property) const {
 	if (p_property.name == "ambient_color" || p_property.name == "ambient_color_energy") {
 		if (ambient_mode != AMBIENT_COLOR) {
diff --git a/scene/3d/reflection_probe.h b/scene/3d/reflection_probe.h
index 425fbb5bc2..7221294228 100644
--- a/scene/3d/reflection_probe.h
+++ b/scene/3d/reflection_probe.h
@@ -122,8 +122,6 @@ public:
 
 	virtual AABB get_aabb() const override;
 
-	virtual PackedStringArray get_configuration_warnings() const override;
-
 	ReflectionProbe();
 	~ReflectionProbe();
 };
diff --git a/scene/3d/skeleton_3d.cpp b/scene/3d/skeleton_3d.cpp
index 566801c9f7..95e94de0e3 100644
--- a/scene/3d/skeleton_3d.cpp
+++ b/scene/3d/skeleton_3d.cpp
@@ -32,6 +32,7 @@
 #include "skeleton_3d.compat.inc"
 
 #include "core/variant/type_info.h"
+#include "scene/3d/skeleton_modifier_3d.h"
 #include "scene/resources/surface_tool.h"
 #include "scene/scene_string_names.h"
 #ifndef DISABLE_DEPRECATED
diff --git a/scene/gui/file_dialog.cpp b/scene/gui/file_dialog.cpp
index 3816b337b8..12b2364ddf 100644
--- a/scene/gui/file_dialog.cpp
+++ b/scene/gui/file_dialog.cpp
@@ -170,6 +170,20 @@ void FileDialog::_validate_property(PropertyInfo &p_property) const {
 
 void FileDialog::_notification(int p_what) {
 	switch (p_what) {
+		case NOTIFICATION_READY: {
+#ifdef TOOLS_ENABLED
+			if (is_part_of_edited_scene()) {
+				return;
+			}
+#endif
+
+			// Replace the built-in dialog with the native one if it started visible.
+			if (is_visible() && DisplayServer::get_singleton()->has_feature(DisplayServer::FEATURE_NATIVE_DIALOG_FILE) && (use_native_dialog || OS::get_singleton()->is_sandboxed())) {
+				ConfirmationDialog::set_visible(false);
+				_native_popup();
+			}
+		} break;
+
 		case NOTIFICATION_VISIBILITY_CHANGED: {
 			if (!is_visible()) {
 				set_process_shortcut_input(false);
@@ -1392,6 +1406,18 @@ void FileDialog::set_default_show_hidden_files(bool p_show) {
 
 void FileDialog::set_use_native_dialog(bool p_native) {
 	use_native_dialog = p_native;
+
+#ifdef TOOLS_ENABLED
+	if (is_part_of_edited_scene()) {
+		return;
+	}
+#endif
+
+	// Replace the built-in dialog with the native one if it's currently visible.
+	if (is_visible() && DisplayServer::get_singleton()->has_feature(DisplayServer::FEATURE_NATIVE_DIALOG_FILE) && (use_native_dialog || OS::get_singleton()->is_sandboxed())) {
+		ConfirmationDialog::set_visible(false);
+		_native_popup();
+	}
 }
 
 bool FileDialog::get_use_native_dialog() const {
diff --git a/scene/gui/rich_text_label.cpp b/scene/gui/rich_text_label.cpp
index 1baf71dd07..f8bbedde09 100644
--- a/scene/gui/rich_text_label.cpp
+++ b/scene/gui/rich_text_label.cpp
@@ -835,10 +835,13 @@ int RichTextLabel::_draw_line(ItemFrame *p_frame, int p_line, const Vector2 &p_o
 	// Draw dropcap.
 	int dc_lines = l.text_buf->get_dropcap_lines();
 	float h_off = l.text_buf->get_dropcap_size().x;
-	if (l.dc_ol_size > 0) {
-		l.text_buf->draw_dropcap_outline(ci, p_ofs + ((rtl) ? Vector2() : Vector2(l.offset.x, 0)), l.dc_ol_size, l.dc_ol_color);
+	bool skip_dc = (trim_chars && l.char_offset > visible_characters) || (trim_glyphs_ltr && (r_processed_glyphs >= visible_glyphs)) || (trim_glyphs_rtl && (r_processed_glyphs < total_glyphs - visible_glyphs));
+	if (!skip_dc) {
+		if (l.dc_ol_size > 0) {
+			l.text_buf->draw_dropcap_outline(ci, p_ofs + ((rtl) ? Vector2() : Vector2(l.offset.x, 0)), l.dc_ol_size, l.dc_ol_color);
+		}
+		l.text_buf->draw_dropcap(ci, p_ofs + ((rtl) ? Vector2() : Vector2(l.offset.x, 0)), l.dc_color);
 	}
-	l.text_buf->draw_dropcap(ci, p_ofs + ((rtl) ? Vector2() : Vector2(l.offset.x, 0)), l.dc_color);
 
 	int line_count = 0;
 	Size2 ctrl_size = get_size();
@@ -894,7 +897,8 @@ int RichTextLabel::_draw_line(ItemFrame *p_frame, int p_line, const Vector2 &p_o
 			} break;
 		}
 
-		if (!prefix.is_empty() && line == 0) {
+		bool skip_prefix = (visible_chars_behavior == TextServer::VC_CHARS_BEFORE_SHAPING && l.char_offset == visible_characters) || (trim_chars && l.char_offset > visible_characters) || (trim_glyphs_ltr && (r_processed_glyphs >= visible_glyphs)) || (trim_glyphs_rtl && (r_processed_glyphs < total_glyphs - visible_glyphs));
+		if (!prefix.is_empty() && line == 0 && !skip_prefix) {
 			Ref<Font> font = theme_cache.normal_font;
 			int font_size = theme_cache.normal_font_size;
 
diff --git a/scene/gui/tab_container.cpp b/scene/gui/tab_container.cpp
index df3c9631f9..105f4484b2 100644
--- a/scene/gui/tab_container.cpp
+++ b/scene/gui/tab_container.cpp
@@ -500,6 +500,13 @@ void TabContainer::_on_tab_visibility_changed(Control *p_child) {
 	updating_visibility = false;
 }
 
+void TabContainer::_refresh_tab_indices() {
+	Vector<Control *> controls = _get_tab_controls();
+	for (int i = 0; i < controls.size(); i++) {
+		controls[i]->set_meta("_tab_index", i);
+	}
+}
+
 void TabContainer::_refresh_tab_names() {
 	Vector<Control *> controls = _get_tab_controls();
 	for (int i = 0; i < controls.size(); i++) {
@@ -523,6 +530,7 @@ void TabContainer::add_child_notify(Node *p_child) {
 	c->hide();
 
 	tab_bar->add_tab(p_child->get_name());
+	c->set_meta("_tab_index", tab_bar->get_tab_count() - 1);
 
 	_update_margins();
 	if (get_tab_count() == 1) {
@@ -547,19 +555,10 @@ void TabContainer::move_child_notify(Node *p_child) {
 
 	Control *c = Object::cast_to<Control>(p_child);
 	if (c && !c->is_set_as_top_level()) {
-		int old_idx = -1;
-		String tab_name = String(c->get_meta("_tab_name", c->get_name()));
-
-		// Find the previous tab index of the control.
-		for (int i = 0; i < get_tab_count(); i++) {
-			if (get_tab_title(i) == tab_name) {
-				old_idx = i;
-				break;
-			}
-		}
-
-		tab_bar->move_tab(old_idx, get_tab_idx_from_control(c));
+		tab_bar->move_tab(c->get_meta("_tab_index"), get_tab_idx_from_control(c));
 	}
+
+	_refresh_tab_indices();
 }
 
 void TabContainer::remove_child_notify(Node *p_child) {
@@ -578,7 +577,10 @@ void TabContainer::remove_child_notify(Node *p_child) {
 
 	// As the child hasn't been removed yet, keep track of it so when the "tab_changed" signal is fired it can be ignored.
 	children_removing.push_back(c);
+
 	tab_bar->remove_tab(idx);
+	_refresh_tab_indices();
+
 	children_removing.erase(c);
 
 	_update_margins();
@@ -586,6 +588,7 @@ void TabContainer::remove_child_notify(Node *p_child) {
 		queue_redraw();
 	}
 
+	p_child->remove_meta("_tab_index");
 	p_child->remove_meta("_tab_name");
 	p_child->disconnect("renamed", callable_mp(this, &TabContainer::_refresh_tab_names));
 	p_child->disconnect(SNAME("visibility_changed"), callable_mp(this, &TabContainer::_on_tab_visibility_changed));
diff --git a/scene/gui/tab_container.h b/scene/gui/tab_container.h
index 03eff5d944..8645a6d14e 100644
--- a/scene/gui/tab_container.h
+++ b/scene/gui/tab_container.h
@@ -101,6 +101,7 @@ private:
 	Vector<Control *> _get_tab_controls() const;
 	void _on_theme_changed();
 	void _repaint();
+	void _refresh_tab_indices();
 	void _refresh_tab_names();
 	void _update_margins();
 	void _on_mouse_exited();
diff --git a/scene/gui/text_edit.cpp b/scene/gui/text_edit.cpp
index 6e31671475..6bb745ac57 100644
--- a/scene/gui/text_edit.cpp
+++ b/scene/gui/text_edit.cpp
@@ -2582,8 +2582,6 @@ void TextEdit::_move_caret_to_line_end(bool p_select) {
 			set_caret_column(row_end_col, i == 0, i);
 		}
 
-		carets.write[i].last_fit_x = INT_MAX;
-
 		if (p_select) {
 			_post_shift_selection(i);
 		}
diff --git a/scene/main/multiplayer_api.cpp b/scene/main/multiplayer_api.cpp
index 35e4302bb7..bd097ec2d0 100644
--- a/scene/main/multiplayer_api.cpp
+++ b/scene/main/multiplayer_api.cpp
@@ -266,10 +266,6 @@ Error MultiplayerAPI::decode_and_decompress_variants(Vector<Variant> &r_variants
 		return OK;
 	}
 
-	Vector<Variant> args;
-	Vector<const Variant *> argp;
-	args.resize(argc);
-
 	for (int i = 0; i < argc; i++) {
 		ERR_FAIL_COND_V_MSG(r_len >= p_len, ERR_INVALID_DATA, "Invalid packet received. Size too small.");
 
diff --git a/scene/main/node.cpp b/scene/main/node.cpp
index beb2583b61..5c5049759f 100644
--- a/scene/main/node.cpp
+++ b/scene/main/node.cpp
@@ -29,7 +29,6 @@
 /**************************************************************************/
 
 #include "node.h"
-#include "node.compat.inc"
 
 #include "core/config/project_settings.h"
 #include "core/core_string_names.h"
@@ -3006,7 +3005,7 @@ static void find_owned_by(Node *p_by, Node *p_node, List<Node *> *p_owned) {
 	}
 }
 
-void Node::replace_by(Node *p_node, bool p_keep_groups, bool p_keep_children) {
+void Node::replace_by(Node *p_node, bool p_keep_groups) {
 	ERR_THREAD_GUARD
 	ERR_FAIL_NULL(p_node);
 	ERR_FAIL_COND(p_node->data.parent);
@@ -3027,13 +3026,13 @@ void Node::replace_by(Node *p_node, bool p_keep_groups, bool p_keep_children) {
 	_replace_connections_target(p_node);
 
 	if (data.owner) {
-		if (p_keep_children) {
-			for (int i = 0; i < get_child_count(); i++) {
-				find_owned_by(data.owner, get_child(i), &owned_by_owner);
-			}
+		for (int i = 0; i < get_child_count(); i++) {
+			find_owned_by(data.owner, get_child(i), &owned_by_owner);
 		}
+
 		_clean_up_owner();
 	}
+
 	Node *parent = data.parent;
 	int index_in_parent = get_index(false);
 
@@ -3045,33 +3044,31 @@ void Node::replace_by(Node *p_node, bool p_keep_groups, bool p_keep_children) {
 
 	emit_signal(SNAME("replacing_by"), p_node);
 
-	if (p_keep_children) {
-		while (get_child_count()) {
-			Node *child = get_child(0);
-			remove_child(child);
-			if (!child->is_owned_by_parent()) {
-				// add the custom children to the p_node
-				Node *child_owner = child->get_owner() == this ? p_node : child->get_owner();
-				child->set_owner(nullptr);
-				p_node->add_child(child);
-				child->set_owner(child_owner);
-			}
+	while (get_child_count()) {
+		Node *child = get_child(0);
+		remove_child(child);
+		if (!child->is_owned_by_parent()) {
+			// add the custom children to the p_node
+			Node *child_owner = child->get_owner() == this ? p_node : child->get_owner();
+			child->set_owner(nullptr);
+			p_node->add_child(child);
+			child->set_owner(child_owner);
 		}
+	}
 
-		for (Node *E : owned) {
-			if (E->data.owner != p_node) {
-				E->set_owner(p_node);
-			}
+	p_node->set_owner(owner);
+	for (Node *E : owned) {
+		if (E->data.owner != p_node) {
+			E->set_owner(p_node);
 		}
+	}
 
-		for (Node *E : owned_by_owner) {
-			if (E->data.owner != owner) {
-				E->set_owner(owner);
-			}
+	for (Node *E : owned_by_owner) {
+		if (E->data.owner != owner) {
+			E->set_owner(owner);
 		}
 	}
 
-	p_node->set_owner(owner);
 	p_node->set_scene_file_path(get_scene_file_path());
 }
 
@@ -3598,7 +3595,7 @@ void Node::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("create_tween"), &Node::create_tween);
 
 	ClassDB::bind_method(D_METHOD("duplicate", "flags"), &Node::duplicate, DEFVAL(DUPLICATE_USE_INSTANTIATION | DUPLICATE_SIGNALS | DUPLICATE_GROUPS | DUPLICATE_SCRIPTS));
-	ClassDB::bind_method(D_METHOD("replace_by", "node", "keep_groups", "keep_children"), &Node::replace_by, DEFVAL(false), DEFVAL(true));
+	ClassDB::bind_method(D_METHOD("replace_by", "node", "keep_groups"), &Node::replace_by, DEFVAL(false));
 
 	ClassDB::bind_method(D_METHOD("set_scene_instance_load_placeholder", "load_placeholder"), &Node::set_scene_instance_load_placeholder);
 	ClassDB::bind_method(D_METHOD("get_scene_instance_load_placeholder"), &Node::get_scene_instance_load_placeholder);
diff --git a/scene/main/node.h b/scene/main/node.h
index 99def10338..f49eeec9cd 100644
--- a/scene/main/node.h
+++ b/scene/main/node.h
@@ -310,11 +310,6 @@ private:
 	Variant _call_thread_safe_bind(const Variant **p_args, int p_argcount, Callable::CallError &r_error);
 
 protected:
-#ifndef DISABLE_DEPRECATED
-	void _replace_by_bind_compat_89992(Node *p_node, bool p_keep_data = false);
-	static void _bind_compatibility_methods();
-#endif // DISABLE_DEPRECATED
-
 	void _block() { data.blocked++; }
 	void _unblock() { data.blocked--; }
 
@@ -634,7 +629,7 @@ public:
 		return binds;
 	}
 
-	void replace_by(Node *p_node, bool p_keep_groups = false, bool p_keep_children = true);
+	void replace_by(Node *p_node, bool p_keep_data = false);
 
 	void set_process_mode(ProcessMode p_mode);
 	ProcessMode get_process_mode() const;
diff --git a/scene/resources/2d/tile_set.cpp b/scene/resources/2d/tile_set.cpp
index 6ae97944e0..04b00b6bdb 100644
--- a/scene/resources/2d/tile_set.cpp
+++ b/scene/resources/2d/tile_set.cpp
@@ -6805,8 +6805,20 @@ void TileData::_get_property_list(List<PropertyInfo> *p_list) const {
 		// Physics layers.
 		p_list->push_back(PropertyInfo(Variant::NIL, GNAME("Physics", ""), PROPERTY_HINT_NONE, "", PROPERTY_USAGE_GROUP));
 		for (int i = 0; i < physics.size(); i++) {
-			p_list->push_back(PropertyInfo(Variant::VECTOR2, vformat("physics_layer_%d/%s", i, PNAME("linear_velocity")), PROPERTY_HINT_NONE));
-			p_list->push_back(PropertyInfo(Variant::FLOAT, vformat("physics_layer_%d/%s", i, PNAME("angular_velocity")), PROPERTY_HINT_NONE));
+			// physics_layer_%d/linear_velocity
+			property_info = PropertyInfo(Variant::VECTOR2, vformat("physics_layer_%d/%s", i, PNAME("linear_velocity")), PROPERTY_HINT_NONE);
+			if (physics[i].linear_velocity == Vector2()) {
+				property_info.usage ^= PROPERTY_USAGE_STORAGE;
+			}
+			p_list->push_back(property_info);
+
+			// physics_layer_%d/angular_velocity
+			property_info = PropertyInfo(Variant::FLOAT, vformat("physics_layer_%d/%s", i, PNAME("angular_velocity")), PROPERTY_HINT_NONE);
+			if (physics[i].angular_velocity == 0.0) {
+				property_info.usage ^= PROPERTY_USAGE_STORAGE;
+			}
+			p_list->push_back(property_info);
+
 			p_list->push_back(PropertyInfo(Variant::INT, vformat("physics_layer_%d/%s", i, PNAME("polygons_count")), PROPERTY_HINT_NONE, "", PROPERTY_USAGE_EDITOR));
 
 			for (int j = 0; j < physics[i].polygons.size(); j++) {
diff --git a/scene/resources/resource_format_text.cpp b/scene/resources/resource_format_text.cpp
index 6d0796f1b9..07868e7e49 100644
--- a/scene/resources/resource_format_text.cpp
+++ b/scene/resources/resource_format_text.cpp
@@ -39,7 +39,8 @@
 
 // Version 2: changed names for Basis, AABB, Vectors, etc.
 // Version 3: new string ID for ext/subresources, breaks forward compat.
-#define FORMAT_VERSION 3
+// Version 4: PackedByteArray is now stored as base64 encoded.
+#define FORMAT_VERSION 4
 
 #define BINARY_FORMAT_VERSION 4
 
@@ -273,8 +274,8 @@ Ref<PackedScene> ResourceLoaderText::_parse_node_tag(VariantParser::ResourcePars
 
 			if (next_tag.fields.has("groups")) {
 				Array groups = next_tag.fields["groups"];
-				for (int i = 0; i < groups.size(); i++) {
-					packed_scene->get_state()->add_node_group(node_id, packed_scene->get_state()->add_name(groups[i]));
+				for (const Variant &group : groups) {
+					packed_scene->get_state()->add_node_group(node_id, packed_scene->get_state()->add_name(group));
 				}
 			}
 
@@ -352,8 +353,8 @@ Ref<PackedScene> ResourceLoaderText::_parse_node_tag(VariantParser::ResourcePars
 			}
 
 			Vector<int> bind_ints;
-			for (int i = 0; i < binds.size(); i++) {
-				bind_ints.push_back(packed_scene->get_state()->add_value(binds[i]));
+			for (const Variant &bind : binds) {
+				bind_ints.push_back(packed_scene->get_state()->add_value(bind));
 			}
 
 			packed_scene->get_state()->add_connection(
@@ -1951,10 +1952,9 @@ void ResourceFormatSaverTextInstance::_find_resources(const Variant &p_variant,
 		} break;
 		case Variant::ARRAY: {
 			Array varray = p_variant;
-			int len = varray.size();
-			for (int i = 0; i < len; i++) {
-				const Variant &v = varray.get(i);
-				_find_resources(v);
+			_find_resources(varray.get_typed_script());
+			for (const Variant &var : varray) {
+				_find_resources(var);
 			}
 
 		} break;
diff --git a/scu_builders.py b/scu_builders.py
index b180cbc864..e6adf6543c 100644
--- a/scu_builders.py
+++ b/scu_builders.py
@@ -23,8 +23,9 @@ def clear_out_stale_files(output_folder, extension, fresh_files):
         return
 
     for file in glob.glob(output_folder + "/*." + extension):
+        file = Path(file)
         if not file in fresh_files:
-            # print("removed stale file: " + file)
+            # print("removed stale file: " + str(file))
             os.remove(file)
 
 
@@ -97,7 +98,7 @@ def write_output_file(file_count, include_list, start_line, end_line, output_fol
     elif _verbose:
         print("SCU: Generation not needed for: " + short_filename)
 
-    return output_filename
+    return output_path
 
 
 def write_exception_output_file(file_count, exception_string, output_folder, output_filename_prefix, extension):
@@ -124,7 +125,7 @@ def write_exception_output_file(file_count, exception_string, output_folder, out
     elif _verbose:
         print("SCU: Generation not needed for: " + short_filename)
 
-    return output_filename
+    return output_path
 
 
 def find_section_name(sub_folder):
diff --git a/servers/rendering/dummy/storage/light_storage.h b/servers/rendering/dummy/storage/light_storage.h
index a76305cdaa..0a9602b603 100644
--- a/servers/rendering/dummy/storage/light_storage.h
+++ b/servers/rendering/dummy/storage/light_storage.h
@@ -91,6 +91,7 @@ public:
 	void light_instance_set_aabb(RID p_light_instance, const AABB &p_aabb) override {}
 	void light_instance_set_shadow_transform(RID p_light_instance, const Projection &p_projection, const Transform3D &p_transform, float p_far, float p_split, int p_pass, float p_shadow_texel_size, float p_bias_scale = 1.0, float p_range_begin = 0, const Vector2 &p_uv_scale = Vector2()) override {}
 	void light_instance_mark_visible(RID p_light_instance) override {}
+	virtual bool light_instance_is_shadow_visible_at_position(RID p_light_instance, const Vector3 &p_position) const override { return false; }
 
 	/* PROBE API */
 	virtual RID reflection_probe_allocate() override { return RID(); }
diff --git a/servers/rendering/renderer_rd/storage_rd/light_storage.h b/servers/rendering/renderer_rd/storage_rd/light_storage.h
index b3d6bf5254..f152cc5dae 100644
--- a/servers/rendering/renderer_rd/storage_rd/light_storage.h
+++ b/servers/rendering/renderer_rd/storage_rd/light_storage.h
@@ -590,6 +590,29 @@ public:
 	virtual void light_instance_set_shadow_transform(RID p_light_instance, const Projection &p_projection, const Transform3D &p_transform, float p_far, float p_split, int p_pass, float p_shadow_texel_size, float p_bias_scale = 1.0, float p_range_begin = 0, const Vector2 &p_uv_scale = Vector2()) override;
 	virtual void light_instance_mark_visible(RID p_light_instance) override;
 
+	virtual bool light_instance_is_shadow_visible_at_position(RID p_light_instance, const Vector3 &p_position) const override {
+		const LightInstance *light_instance = light_instance_owner.get_or_null(p_light_instance);
+		ERR_FAIL_NULL_V(light_instance, false);
+		const Light *light = light_owner.get_or_null(light_instance->light);
+		ERR_FAIL_NULL_V(light, false);
+
+		if (!light->shadow) {
+			return false;
+		}
+
+		if (!light->distance_fade) {
+			return true;
+		}
+
+		real_t distance = p_position.distance_to(light_instance->transform.origin);
+
+		if (distance > light->distance_fade_shadow + light->distance_fade_length) {
+			return false;
+		}
+
+		return true;
+	}
+
 	_FORCE_INLINE_ RID light_instance_get_base_light(RID p_light_instance) {
 		LightInstance *li = light_instance_owner.get_or_null(p_light_instance);
 		return li->light;
diff --git a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp
index c5d74d395f..b7934cb3de 100644
--- a/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp
+++ b/servers/rendering/renderer_rd/storage_rd/render_scene_buffers_rd.cpp
@@ -130,9 +130,10 @@ void RenderSceneBuffersRD::cleanup() {
 	named_textures.clear();
 
 	// Clear weight_buffer / blur textures.
-	for (const WeightBuffers &weight_buffer : weight_buffers) {
+	for (WeightBuffers &weight_buffer : weight_buffers) {
 		if (weight_buffer.weight.is_valid()) {
 			RD::get_singleton()->free(weight_buffer.weight);
+			weight_buffer.weight = RID();
 		}
 	}
 }
diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp
index b33de9d6f4..96c0479ac3 100644
--- a/servers/rendering/renderer_scene_cull.cpp
+++ b/servers/rendering/renderer_scene_cull.cpp
@@ -1029,7 +1029,6 @@ inline bool is_geometry_instance(RenderingServer::InstanceType p_type) {
 void RendererSceneCull::instance_set_custom_aabb(RID p_instance, AABB p_aabb) {
 	Instance *instance = instance_owner.get_or_null(p_instance);
 	ERR_FAIL_NULL(instance);
-	ERR_FAIL_COND(!is_geometry_instance(instance->base_type));
 
 	if (p_aabb != AABB()) {
 		// Set custom AABB
@@ -3029,6 +3028,7 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c
 	light_culler->prepare_camera(p_camera_data->main_transform, p_camera_data->main_projection);
 
 	Scenario *scenario = scenario_owner.get_or_null(p_scenario);
+	Vector3 camera_position = p_camera_data->main_transform.origin;
 
 	ERR_FAIL_COND(p_render_buffers.is_null());
 
@@ -3038,7 +3038,7 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c
 
 	if (p_reflection_probe.is_null()) {
 		//no rendering code here, this is only to set up what needs to be done, request regions, etc.
-		scene_render->sdfgi_update(p_render_buffers, p_environment, p_camera_data->main_transform.origin); //update conditions for SDFGI (whether its used or not)
+		scene_render->sdfgi_update(p_render_buffers, p_environment, camera_position); //update conditions for SDFGI (whether its used or not)
 	}
 
 	RENDER_TIMESTAMP("Update Visibility Dependencies");
@@ -3051,7 +3051,7 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c
 		VisibilityCullData visibility_cull_data;
 		visibility_cull_data.scenario = scenario;
 		visibility_cull_data.viewport_mask = scenario->viewport_visibility_masks[p_viewport];
-		visibility_cull_data.camera_position = p_camera_data->main_transform.origin;
+		visibility_cull_data.camera_position = camera_position;
 
 		for (int i = scenario->instance_visibility.get_bin_count() - 1; i > 0; i--) { // We skip bin 0
 			visibility_cull_data.cull_offset = scenario->instance_visibility.get_bin_start(i);
@@ -3220,16 +3220,20 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c
 			}
 		}
 
-		// Positional Shadowss
+		// Positional Shadows
 		for (uint32_t i = 0; i < (uint32_t)scene_cull_result.lights.size(); i++) {
 			Instance *ins = scene_cull_result.lights[i];
 
-			if (!p_shadow_atlas.is_valid() || !RSG::light_storage->light_has_shadow(ins->base)) {
+			if (!p_shadow_atlas.is_valid()) {
 				continue;
 			}
 
 			InstanceLightData *light = static_cast<InstanceLightData *>(ins->base_data);
 
+			if (!RSG::light_storage->light_instance_is_shadow_visible_at_position(light->instance, camera_position)) {
+				continue;
+			}
+
 			float coverage = 0.f;
 
 			{ //compute coverage
diff --git a/servers/rendering/rendering_device.cpp b/servers/rendering/rendering_device.cpp
index 1fdccb929b..2b6644e893 100644
--- a/servers/rendering/rendering_device.cpp
+++ b/servers/rendering/rendering_device.cpp
@@ -5224,8 +5224,12 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r
 		case DRIVER_RESOURCE_LOGICAL_DEVICE:
 		case DRIVER_RESOURCE_PHYSICAL_DEVICE:
 		case DRIVER_RESOURCE_TOPMOST_OBJECT:
+			break;
 		case DRIVER_RESOURCE_COMMAND_QUEUE:
+			driver_id = main_queue.id;
+			break;
 		case DRIVER_RESOURCE_QUEUE_FAMILY:
+			driver_id = main_queue_family.id;
 			break;
 		case DRIVER_RESOURCE_TEXTURE:
 		case DRIVER_RESOURCE_TEXTURE_VIEW:
@@ -5233,19 +5237,19 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r
 			Texture *tex = texture_owner.get_or_null(p_rid);
 			ERR_FAIL_NULL_V(tex, 0);
 
-			driver_id = tex->driver_id;
+			driver_id = tex->driver_id.id;
 		} break;
 		case DRIVER_RESOURCE_SAMPLER: {
 			RDD::SamplerID *sampler_driver_id = sampler_owner.get_or_null(p_rid);
 			ERR_FAIL_NULL_V(sampler_driver_id, 0);
 
-			driver_id = *sampler_driver_id;
+			driver_id = (*sampler_driver_id).id;
 		} break;
 		case DRIVER_RESOURCE_UNIFORM_SET: {
 			UniformSet *uniform_set = uniform_set_owner.get_or_null(p_rid);
 			ERR_FAIL_NULL_V(uniform_set, 0);
 
-			driver_id = uniform_set->driver_id;
+			driver_id = uniform_set->driver_id.id;
 		} break;
 		case DRIVER_RESOURCE_BUFFER: {
 			Buffer *buffer = nullptr;
@@ -5262,19 +5266,19 @@ uint64_t RenderingDevice::get_driver_resource(DriverResource p_resource, RID p_r
 			}
 			ERR_FAIL_NULL_V(buffer, 0);
 
-			driver_id = buffer->driver_id;
+			driver_id = buffer->driver_id.id;
 		} break;
 		case DRIVER_RESOURCE_COMPUTE_PIPELINE: {
 			ComputePipeline *compute_pipeline = compute_pipeline_owner.get_or_null(p_rid);
 			ERR_FAIL_NULL_V(compute_pipeline, 0);
 
-			driver_id = compute_pipeline->driver_id;
+			driver_id = compute_pipeline->driver_id.id;
 		} break;
 		case DRIVER_RESOURCE_RENDER_PIPELINE: {
 			RenderPipeline *render_pipeline = render_pipeline_owner.get_or_null(p_rid);
 			ERR_FAIL_NULL_V(render_pipeline, 0);
 
-			driver_id = render_pipeline->driver_id;
+			driver_id = render_pipeline->driver_id.id;
 		} break;
 		default: {
 			ERR_FAIL_V(0);
@@ -5550,6 +5554,7 @@ void RenderingDevice::_bind_methods() {
 	ClassDB::bind_method(D_METHOD("compute_list_set_push_constant", "compute_list", "buffer", "size_bytes"), &RenderingDevice::_compute_list_set_push_constant);
 	ClassDB::bind_method(D_METHOD("compute_list_bind_uniform_set", "compute_list", "uniform_set", "set_index"), &RenderingDevice::compute_list_bind_uniform_set);
 	ClassDB::bind_method(D_METHOD("compute_list_dispatch", "compute_list", "x_groups", "y_groups", "z_groups"), &RenderingDevice::compute_list_dispatch);
+	ClassDB::bind_method(D_METHOD("compute_list_dispatch_indirect", "compute_list", "buffer", "offset"), &RenderingDevice::compute_list_dispatch_indirect);
 	ClassDB::bind_method(D_METHOD("compute_list_add_barrier", "compute_list"), &RenderingDevice::compute_list_add_barrier);
 	ClassDB::bind_method(D_METHOD("compute_list_end"), &RenderingDevice::compute_list_end);
 
diff --git a/servers/rendering/rendering_device_driver.h b/servers/rendering/rendering_device_driver.h
index 09a0412941..e9464ba321 100644
--- a/servers/rendering/rendering_device_driver.h
+++ b/servers/rendering/rendering_device_driver.h
@@ -128,7 +128,7 @@ public:
 
 #define DEFINE_ID(m_name)                                                                             \
 	struct m_name##ID : public ID {                                                                   \
-		_ALWAYS_INLINE_ operator bool() const { return id != 0; }                                     \
+		_ALWAYS_INLINE_ explicit operator bool() const { return id != 0; }                            \
 		_ALWAYS_INLINE_ m_name##ID &operator=(m_name##ID p_other) {                                   \
 			id = p_other.id;                                                                          \
 			return *this;                                                                             \
diff --git a/servers/rendering/rendering_device_graph.cpp b/servers/rendering/rendering_device_graph.cpp
index 4b85d1c2bf..adac7ee3eb 100644
--- a/servers/rendering/rendering_device_graph.cpp
+++ b/servers/rendering/rendering_device_graph.cpp
@@ -261,7 +261,7 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 			}
 
 			if (resource_tracker->parent->usage == RESOURCE_USAGE_NONE) {
-				if (resource_tracker->parent->texture_driver_id != 0) {
+				if (resource_tracker->parent->texture_driver_id.id != 0) {
 					// If the resource is a texture, we transition it entirely to the layout determined by the first slice that uses it.
 					_add_texture_barrier_to_command(resource_tracker->parent->texture_driver_id, RDD::BarrierAccessBits(0), new_usage_access, RDG::RESOURCE_USAGE_NONE, new_resource_usage, resource_tracker->parent->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
 				}
@@ -324,7 +324,7 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 								ERR_FAIL_MSG("Texture slices that overlap can't be used in the same command.");
 							} else {
 								// Delete the slice from the dirty list and revert it to the usage of the parent.
-								if (current_tracker->texture_driver_id != 0) {
+								if (current_tracker->texture_driver_id.id != 0) {
 									_add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->parent->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
 
 									// Merge the area of the slice with the current tracking area of the command and indicate it's a write usage as well.
@@ -383,7 +383,7 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
 			while (current_tracker != nullptr) {
 				current_tracker->reset_if_outdated(tracking_frame);
 
-				if (current_tracker->texture_driver_id != 0) {
+				if (current_tracker->texture_driver_id.id != 0) {
 					// Transition all slices to the layout of the parent resource.
 					_add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
 				}
diff --git a/servers/rendering/shader_language.cpp b/servers/rendering/shader_language.cpp
index fef1a205d6..8fbad346a4 100644
--- a/servers/rendering/shader_language.cpp
+++ b/servers/rendering/shader_language.cpp
@@ -8361,7 +8361,7 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f
 						}
 					}
 #endif // DEBUG_ENABLED
-					if (String(shader_type_identifier) != "spatial") {
+					if (shader_type_identifier != StringName() && String(shader_type_identifier) != "spatial") {
 						_set_error(vformat(RTR("Uniform instances are not yet implemented for '%s' shaders."), shader_type_identifier));
 						return ERR_PARSE_ERROR;
 					}
@@ -8848,7 +8848,7 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f
 										_set_error(RTR("'hint_normal_roughness_texture' is only available when using the Forward+ backend."));
 										return ERR_PARSE_ERROR;
 									}
-									if (String(shader_type_identifier) != "spatial") {
+									if (shader_type_identifier != StringName() && String(shader_type_identifier) != "spatial") {
 										_set_error(vformat(RTR("'hint_normal_roughness_texture' is not supported in '%s' shaders."), shader_type_identifier));
 										return ERR_PARSE_ERROR;
 									}
@@ -8857,7 +8857,7 @@ Error ShaderLanguage::_parse_shader(const HashMap<StringName, FunctionInfo> &p_f
 									new_hint = ShaderNode::Uniform::HINT_DEPTH_TEXTURE;
 									--texture_uniforms;
 									--texture_binding;
-									if (String(shader_type_identifier) != "spatial") {
+									if (shader_type_identifier != StringName() && String(shader_type_identifier) != "spatial") {
 										_set_error(vformat(RTR("'hint_depth_texture' is not supported in '%s' shaders."), shader_type_identifier));
 										return ERR_PARSE_ERROR;
 									}
diff --git a/servers/rendering/storage/light_storage.h b/servers/rendering/storage/light_storage.h
index d439598f3d..6a0adfa596 100644
--- a/servers/rendering/storage/light_storage.h
+++ b/servers/rendering/storage/light_storage.h
@@ -98,6 +98,7 @@ public:
 	virtual bool light_instances_can_render_shadow_cube() const {
 		return true;
 	}
+	virtual bool light_instance_is_shadow_visible_at_position(RID p_light, const Vector3 &p_position) const = 0;
 
 	/* PROBE API */
 
diff --git a/tests/core/io/test_ip.h b/tests/core/io/test_ip.h
new file mode 100644
index 0000000000..7b5583faa0
--- /dev/null
+++ b/tests/core/io/test_ip.h
@@ -0,0 +1,51 @@
+/**************************************************************************/
+/*  test_ip.h                                                             */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef TEST_IP_H
+#define TEST_IP_H
+
+#include "core/io/ip.h"
+
+#include "tests/test_macros.h"
+
+namespace TestIP {
+
+TEST_CASE("[IP] resolve_hostname") {
+	for (int x = 0; x < 1000; x++) {
+		IPAddress IPV4 = IP::get_singleton()->resolve_hostname("localhost", IP::TYPE_IPV4);
+		CHECK("127.0.0.1" == String(IPV4));
+		IPAddress IPV6 = IP::get_singleton()->resolve_hostname("localhost", IP::TYPE_IPV6);
+		CHECK("0:0:0:0:0:0:0:1" == String(IPV6));
+	}
+}
+
+} // namespace TestIP
+
+#endif // TEST_IP_H
diff --git a/tests/core/math/test_geometry_2d.h b/tests/core/math/test_geometry_2d.h
index c3ff4f3ec9..50b2575700 100644
--- a/tests/core/math/test_geometry_2d.h
+++ b/tests/core/math/test_geometry_2d.h
@@ -282,41 +282,67 @@ TEST_CASE("[Geometry2D] Closest point to uncapped segment") {
 
 TEST_CASE("[Geometry2D] Closest points between segments") {
 	Vector2 c1, c2;
-	Geometry2D::get_closest_points_between_segments(Vector2(2, 2), Vector2(3, 3), Vector2(4, 4), Vector2(4, 5), c1, c2);
-	CHECK(c1.is_equal_approx(Vector2(3, 3)));
-	CHECK(c2.is_equal_approx(Vector2(4, 4)));
+	// Basis Path Testing suite
+	SUBCASE("[Geometry2D] Both segments degenerate to a point") {
+		Geometry2D::get_closest_points_between_segments(Vector2(0, 0), Vector2(0, 0), Vector2(0, 0), Vector2(0, 0), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(0, 0)));
+		CHECK(c2.is_equal_approx(Vector2(0, 0)));
+	}
 
-	Geometry2D::get_closest_points_between_segments(Vector2(0, 1), Vector2(-2, -1), Vector2(0, 0), Vector2(2, -2), c1, c2);
-	CHECK(c1.is_equal_approx(Vector2(-0.5, 0.5)));
-	CHECK(c2.is_equal_approx(Vector2(0, 0)));
+	SUBCASE("[Geometry2D] Closest point on second segment trajectory is above [0,1]") {
+		Geometry2D::get_closest_points_between_segments(Vector2(50, -25), Vector2(50, -10), Vector2(-50, 10), Vector2(-40, 10), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(50, -10)));
+		CHECK(c2.is_equal_approx(Vector2(-40, 10)));
+	}
 
-	Geometry2D::get_closest_points_between_segments(Vector2(-1, 1), Vector2(1, -1), Vector2(1, 1), Vector2(-1, -1), c1, c2);
-	CHECK(c1.is_equal_approx(Vector2(0, 0)));
-	CHECK(c2.is_equal_approx(Vector2(0, 0)));
+	SUBCASE("[Geometry2D] Parallel segments") {
+		Geometry2D::get_closest_points_between_segments(Vector2(2, 1), Vector2(4, 3), Vector2(2, 3), Vector2(4, 5), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(3, 2)));
+		CHECK(c2.is_equal_approx(Vector2(2, 3)));
+	}
 
-	Geometry2D::get_closest_points_between_segments(Vector2(-3, 4), Vector2(-3, 4), Vector2(-4, 3), Vector2(-2, 3), c1, c2);
-	CHECK_MESSAGE(
-			c1.is_equal_approx(Vector2(-3, 4)),
-			"1st line segment is only a point, this point should be the closest point to the 2nd line segment.");
-	CHECK_MESSAGE(
-			c2.is_equal_approx(Vector2(-3, 3)),
-			"1st line segment is only a point, this should not matter when determining the closest point on the 2nd line segment.");
+	SUBCASE("[Geometry2D] Closest point on second segment trajectory is within [0,1]") {
+		Geometry2D::get_closest_points_between_segments(Vector2(2, 4), Vector2(2, 3), Vector2(1, 1), Vector2(4, 4), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(2, 3)));
+		CHECK(c2.is_equal_approx(Vector2(2.5, 2.5)));
+	}
 
-	Geometry2D::get_closest_points_between_segments(Vector2(-4, 3), Vector2(-2, 3), Vector2(-3, 4), Vector2(-3, 4), c1, c2);
-	CHECK_MESSAGE(
-			c1.is_equal_approx(Vector2(-3, 3)),
-			"2nd line segment is only a point, this should not matter when determining the closest point on the 1st line segment.");
-	CHECK_MESSAGE(
-			c2.is_equal_approx(Vector2(-3, 4)),
-			"2nd line segment is only a point, this point should be the closest point to the 1st line segment.");
+	SUBCASE("[Geometry2D] Closest point on second segment trajectory is below [0,1]") {
+		Geometry2D::get_closest_points_between_segments(Vector2(-20, -20), Vector2(-10, -40), Vector2(10, 25), Vector2(25, 40), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(-20, -20)));
+		CHECK(c2.is_equal_approx(Vector2(10, 25)));
+	}
 
-	Geometry2D::get_closest_points_between_segments(Vector2(5, -4), Vector2(5, -4), Vector2(-2, 1), Vector2(-2, 1), c1, c2);
-	CHECK_MESSAGE(
-			c1.is_equal_approx(Vector2(5, -4)),
-			"Both line segments are only a point. On the 1st line segment, that point should be the closest point to the 2nd line segment.");
-	CHECK_MESSAGE(
-			c2.is_equal_approx(Vector2(-2, 1)),
-			"Both line segments are only a point. On the 2nd line segment, that point should be the closest point to the 1st line segment.");
+	SUBCASE("[Geometry2D] Second segment degenerates to a point") {
+		Geometry2D::get_closest_points_between_segments(Vector2(1, 2), Vector2(2, 1), Vector2(3, 3), Vector2(3, 3), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(1.5, 1.5)));
+		CHECK(c2.is_equal_approx(Vector2(3, 3)));
+	}
+
+	SUBCASE("[Geometry2D] First segment degenerates to a point") {
+		Geometry2D::get_closest_points_between_segments(Vector2(1, 1), Vector2(1, 1), Vector2(2, 2), Vector2(4, 4), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(1, 1)));
+		CHECK(c2.is_equal_approx(Vector2(2, 2)));
+	}
+	// End Basis Path Testing suite
+
+	SUBCASE("[Geometry2D] Segments are equal vectors") {
+		Geometry2D::get_closest_points_between_segments(Vector2(2, 2), Vector2(3, 3), Vector2(4, 4), Vector2(4, 5), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(3, 3)));
+		CHECK(c2.is_equal_approx(Vector2(4, 4)));
+	}
+
+	SUBCASE("[Geometry2D] Standard case") {
+		Geometry2D::get_closest_points_between_segments(Vector2(0, 1), Vector2(-2, -1), Vector2(0, 0), Vector2(2, -2), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(-0.5, 0.5)));
+		CHECK(c2.is_equal_approx(Vector2(0, 0)));
+	}
+
+	SUBCASE("[Geometry2D] Segments intersect") {
+		Geometry2D::get_closest_points_between_segments(Vector2(-1, 1), Vector2(1, -1), Vector2(1, 1), Vector2(-1, -1), c1, c2);
+		CHECK(c1.is_equal_approx(Vector2(0, 0)));
+		CHECK(c2.is_equal_approx(Vector2(0, 0)));
+	}
 }
 
 TEST_CASE("[Geometry2D] Make atlas") {
diff --git a/tests/core/templates/test_oa_hash_map.h b/tests/core/templates/test_oa_hash_map.h
new file mode 100644
index 0000000000..9359efa964
--- /dev/null
+++ b/tests/core/templates/test_oa_hash_map.h
@@ -0,0 +1,225 @@
+/**************************************************************************/
+/*  test_oa_hash_map.h                                                    */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef TEST_OA_HASH_MAP_H
+#define TEST_OA_HASH_MAP_H
+
+#include "core/templates/oa_hash_map.h"
+#include "scene/resources/texture.h"
+
+#include "tests/test_macros.h"
+
+namespace TestOAHashMap {
+
+TEST_CASE("[OAHashMap] Insert element") {
+	OAHashMap<int, int> map;
+	map.insert(42, 84);
+	int data = 0;
+	bool lookup_res = map.lookup(42, data);
+	int value = *map.lookup_ptr(42);
+	CHECK(lookup_res);
+	CHECK(value == 84);
+	CHECK(data == 84);
+}
+
+TEST_CASE("[OAHashMap] Set element") {
+	OAHashMap<int, int> map;
+	map.set(42, 84);
+	int data = 0;
+	bool lookup_res = map.lookup(42, data);
+	int value = *map.lookup_ptr(42);
+	CHECK(lookup_res);
+	CHECK(value == 84);
+	CHECK(data == 84);
+}
+
+TEST_CASE("[OAHashMap] Overwrite element") {
+	OAHashMap<int, int> map;
+	map.set(42, 84);
+	map.set(42, 1234);
+	int result = *map.lookup_ptr(42);
+	CHECK(result == 1234);
+}
+
+TEST_CASE("[OAHashMap] Remove element") {
+	OAHashMap<int, int> map;
+	map.insert(42, 84);
+	map.remove(42);
+	CHECK(!map.has(42));
+}
+
+TEST_CASE("[OAHashMap] Get Num_Elements") {
+	OAHashMap<int, int> map;
+	map.set(42, 84);
+	map.set(123, 84);
+	map.set(123, 84);
+	map.set(0, 84);
+	map.set(123485, 84);
+
+	CHECK(map.get_num_elements() == 4);
+}
+
+TEST_CASE("[OAHashMap] Iteration") {
+	OAHashMap<int, int> map;
+	map.insert(42, 84);
+	map.insert(123, 12385);
+	map.insert(0, 12934);
+	map.insert(123485, 1238888);
+	map.set(123, 111111);
+
+	Vector<Pair<int, int>> expected;
+	expected.push_back(Pair<int, int>(42, 84));
+	expected.push_back(Pair<int, int>(123, 111111));
+	expected.push_back(Pair<int, int>(0, 12934));
+	expected.push_back(Pair<int, int>(123485, 1238888));
+
+	for (OAHashMap<int, int>::Iterator it = map.iter(); it.valid; it = map.next_iter(it)) {
+		int64_t result = expected.find(Pair<int, int>(*it.key, *it.value));
+		CHECK(result >= 0);
+	}
+}
+
+TEST_CASE("[OAHashMap] Insert, iterate, remove many strings") {
+	uint64_t pre_mem = Memory::get_mem_usage();
+	{
+		const int elem_max = 40;
+		OAHashMap<String, int> map;
+		for (int i = 0; i < elem_max; i++) {
+			map.insert(itos(i), i);
+		}
+
+		Vector<String> elems_still_valid;
+
+		for (int i = 0; i < elem_max; i++) {
+			if ((i % 5) == 0) {
+				map.remove(itos(i));
+			} else {
+				elems_still_valid.push_back(itos(i));
+			}
+		}
+
+		CHECK(elems_still_valid.size() == map.get_num_elements());
+
+		for (int i = 0; i < elems_still_valid.size(); i++) {
+			CHECK(map.has(elems_still_valid[i]));
+		}
+	}
+
+	CHECK(Memory::get_mem_usage() == pre_mem);
+}
+
+TEST_CASE("[OAHashMap] Clear") {
+	OAHashMap<int, int> map;
+	map.insert(42, 84);
+	map.insert(0, 1234);
+	map.clear();
+	CHECK(!map.has(42));
+	CHECK(!map.has(0));
+	CHECK(map.is_empty());
+}
+
+TEST_CASE("[OAHashMap] Copy constructor") {
+	uint64_t pre_mem = Memory::get_mem_usage();
+	{
+		OAHashMap<int, int> map0;
+		const uint32_t count = 5;
+		for (uint32_t i = 0; i < count; i++) {
+			map0.insert(i, i);
+		}
+		OAHashMap<int, int> map1(map0);
+		CHECK(map0.get_num_elements() == map1.get_num_elements());
+		CHECK(map0.get_capacity() == map1.get_capacity());
+		CHECK(*map0.lookup_ptr(0) == *map1.lookup_ptr(0));
+	}
+	CHECK(Memory::get_mem_usage() == pre_mem);
+}
+
+TEST_CASE("[OAHashMap] Operator =") {
+	uint64_t pre_mem = Memory::get_mem_usage();
+	{
+		OAHashMap<int, int> map0;
+		OAHashMap<int, int> map1;
+		const uint32_t count = 5;
+		map1.insert(1234, 1234);
+		for (uint32_t i = 0; i < count; i++) {
+			map0.insert(i, i);
+		}
+		map1 = map0;
+		CHECK(map0.get_num_elements() == map1.get_num_elements());
+		CHECK(map0.get_capacity() == map1.get_capacity());
+		CHECK(*map0.lookup_ptr(0) == *map1.lookup_ptr(0));
+	}
+	CHECK(Memory::get_mem_usage() == pre_mem);
+}
+
+TEST_CASE("[OAHashMap] Non-trivial types") {
+	uint64_t pre_mem = Memory::get_mem_usage();
+	{
+		OAHashMap<String, Ref<Texture2D>> map1;
+		const uint32_t count = 10;
+		for (uint32_t i = 0; i < count; i++) {
+			String string = "qwerty";
+			string += itos(i);
+			Ref<Texture2D> ref_texture_2d;
+
+			map1.set(string, ref_texture_2d);
+			Ref<Texture2D> map_vec = *map1.lookup_ptr(string);
+			CHECK(map_vec == ref_texture_2d);
+		}
+		OAHashMap<String, Ref<Texture2D>> map1copy(map1);
+		CHECK(map1copy.has(String("qwerty0")));
+		map1copy = map1;
+		CHECK(map1copy.has(String("qwerty2")));
+
+		OAHashMap<int64_t, Vector4 *> map2;
+
+		for (uint32_t i = 0; i < count; i++) {
+			Vector4 *vec = memnew(Vector4);
+			vec->x = 10;
+			vec->y = 12;
+			vec->z = 151;
+			vec->w = -13;
+			map2.set(i, vec);
+			Vector4 *p = nullptr;
+			map2.lookup(i, p);
+			CHECK(*p == *vec);
+		}
+
+		OAHashMap<int64_t, Vector4 *> map3(map2);
+		for (OAHashMap<int64_t, Vector4 *>::Iterator it = map2.iter(); it.valid; it = map2.next_iter(it)) {
+			memdelete(*(it.value));
+		}
+	}
+	CHECK(Memory::get_mem_usage() == pre_mem);
+}
+
+} // namespace TestOAHashMap
+
+#endif // TEST_OA_HASH_MAP_H
diff --git a/tests/core/variant/test_array.h b/tests/core/variant/test_array.h
index ea61ae2a02..287345e831 100644
--- a/tests/core/variant/test_array.h
+++ b/tests/core/variant/test_array.h
@@ -545,6 +545,54 @@ TEST_CASE("[Array] Recursive self comparison") {
 	a2.clear();
 }
 
+TEST_CASE("[Array] Iteration") {
+	Array a1 = build_array(1, 2, 3);
+	Array a2 = build_array(1, 2, 3);
+
+	int idx = 0;
+	for (Variant &E : a1) {
+		CHECK_EQ(int(a2[idx]), int(E));
+		idx++;
+	}
+
+	idx = 0;
+
+	for (const Variant &E : (const Array &)a1) {
+		CHECK_EQ(int(a2[idx]), int(E));
+		idx++;
+	}
+
+	a1.clear();
+}
+
+TEST_CASE("[Array] Iteration and modification") {
+	Array a1 = build_array(1, 2, 3);
+	Array a2 = build_array(2, 3, 4);
+	Array a3 = build_array(1, 2, 3);
+	Array a4 = build_array(1, 2, 3);
+	a3.make_read_only();
+
+	int idx = 0;
+	for (Variant &E : a1) {
+		E = a2[idx];
+		idx++;
+	}
+
+	CHECK_EQ(a1, a2);
+
+	// Ensure read-only is respected.
+	idx = 0;
+	for (Variant &E : a3) {
+		E = a2[idx];
+	}
+
+	CHECK_EQ(a3, a4);
+
+	a1.clear();
+	a2.clear();
+	a4.clear();
+}
+
 } // namespace TestArray
 
 #endif // TEST_ARRAY_H
diff --git a/tests/scene/test_camera_2d.h b/tests/scene/test_camera_2d.h
new file mode 100644
index 0000000000..f03a4aed53
--- /dev/null
+++ b/tests/scene/test_camera_2d.h
@@ -0,0 +1,318 @@
+/**************************************************************************/
+/*  test_camera_2d.h                                                      */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#ifndef TEST_CAMERA_2D_H
+#define TEST_CAMERA_2D_H
+
+#include "scene/2d/camera_2d.h"
+#include "scene/main/viewport.h"
+#include "scene/main/window.h"
+#include "tests/test_macros.h"
+
+namespace TestCamera2D {
+
+TEST_CASE("[SceneTree][Camera2D] Getters and setters") {
+	Camera2D *test_camera = memnew(Camera2D);
+
+	SUBCASE("AnchorMode") {
+		test_camera->set_anchor_mode(Camera2D::AnchorMode::ANCHOR_MODE_FIXED_TOP_LEFT);
+		CHECK(test_camera->get_anchor_mode() == Camera2D::AnchorMode::ANCHOR_MODE_FIXED_TOP_LEFT);
+		test_camera->set_anchor_mode(Camera2D::AnchorMode::ANCHOR_MODE_DRAG_CENTER);
+		CHECK(test_camera->get_anchor_mode() == Camera2D::AnchorMode::ANCHOR_MODE_DRAG_CENTER);
+	}
+
+	SUBCASE("ProcessCallback") {
+		test_camera->set_process_callback(Camera2D::Camera2DProcessCallback::CAMERA2D_PROCESS_PHYSICS);
+		CHECK(test_camera->get_process_callback() == Camera2D::Camera2DProcessCallback::CAMERA2D_PROCESS_PHYSICS);
+		test_camera->set_process_callback(Camera2D::Camera2DProcessCallback::CAMERA2D_PROCESS_IDLE);
+		CHECK(test_camera->get_process_callback() == Camera2D::Camera2DProcessCallback::CAMERA2D_PROCESS_IDLE);
+	}
+
+	SUBCASE("Drag") {
+		constexpr float drag_left_margin = 0.8f;
+		constexpr float drag_top_margin = 0.8f;
+		constexpr float drag_right_margin = 0.8f;
+		constexpr float drag_bottom_margin = 0.8f;
+		constexpr float drag_horizontal_offset1 = 0.5f;
+		constexpr float drag_horizontal_offset2 = -0.5f;
+		constexpr float drag_vertical_offset1 = 0.5f;
+		constexpr float drag_vertical_offset2 = -0.5f;
+		test_camera->set_drag_margin(SIDE_LEFT, drag_left_margin);
+		CHECK(test_camera->get_drag_margin(SIDE_LEFT) == drag_left_margin);
+		test_camera->set_drag_margin(SIDE_TOP, drag_top_margin);
+		CHECK(test_camera->get_drag_margin(SIDE_TOP) == drag_top_margin);
+		test_camera->set_drag_margin(SIDE_RIGHT, drag_right_margin);
+		CHECK(test_camera->get_drag_margin(SIDE_RIGHT) == drag_right_margin);
+		test_camera->set_drag_margin(SIDE_BOTTOM, drag_bottom_margin);
+		CHECK(test_camera->get_drag_margin(SIDE_BOTTOM) == drag_bottom_margin);
+		test_camera->set_drag_horizontal_enabled(true);
+		CHECK(test_camera->is_drag_horizontal_enabled());
+		test_camera->set_drag_horizontal_enabled(false);
+		CHECK_FALSE(test_camera->is_drag_horizontal_enabled());
+		test_camera->set_drag_horizontal_offset(drag_horizontal_offset1);
+		CHECK(test_camera->get_drag_horizontal_offset() == drag_horizontal_offset1);
+		test_camera->set_drag_horizontal_offset(drag_horizontal_offset2);
+		CHECK(test_camera->get_drag_horizontal_offset() == drag_horizontal_offset2);
+		test_camera->set_drag_vertical_enabled(true);
+		CHECK(test_camera->is_drag_vertical_enabled());
+		test_camera->set_drag_vertical_enabled(false);
+		CHECK_FALSE(test_camera->is_drag_vertical_enabled());
+		test_camera->set_drag_vertical_offset(drag_vertical_offset1);
+		CHECK(test_camera->get_drag_vertical_offset() == drag_vertical_offset1);
+		test_camera->set_drag_vertical_offset(drag_vertical_offset2);
+		CHECK(test_camera->get_drag_vertical_offset() == drag_vertical_offset2);
+	}
+
+	SUBCASE("Drawing") {
+		test_camera->set_margin_drawing_enabled(true);
+		CHECK(test_camera->is_margin_drawing_enabled());
+		test_camera->set_margin_drawing_enabled(false);
+		CHECK_FALSE(test_camera->is_margin_drawing_enabled());
+		test_camera->set_limit_drawing_enabled(true);
+		CHECK(test_camera->is_limit_drawing_enabled());
+		test_camera->set_limit_drawing_enabled(false);
+		CHECK_FALSE(test_camera->is_limit_drawing_enabled());
+		test_camera->set_screen_drawing_enabled(true);
+		CHECK(test_camera->is_screen_drawing_enabled());
+		test_camera->set_screen_drawing_enabled(false);
+		CHECK_FALSE(test_camera->is_screen_drawing_enabled());
+	}
+
+	SUBCASE("Enabled") {
+		test_camera->set_enabled(true);
+		CHECK(test_camera->is_enabled());
+		test_camera->set_enabled(false);
+		CHECK_FALSE(test_camera->is_enabled());
+	}
+
+	SUBCASE("Rotation") {
+		constexpr float rotation_smoothing_speed = 20.0f;
+		test_camera->set_ignore_rotation(true);
+		CHECK(test_camera->is_ignoring_rotation());
+		test_camera->set_ignore_rotation(false);
+		CHECK_FALSE(test_camera->is_ignoring_rotation());
+		test_camera->set_rotation_smoothing_enabled(true);
+		CHECK(test_camera->is_rotation_smoothing_enabled());
+		test_camera->set_rotation_smoothing_speed(rotation_smoothing_speed);
+		CHECK(test_camera->get_rotation_smoothing_speed() == rotation_smoothing_speed);
+	}
+
+	SUBCASE("Zoom") {
+		const Vector2 zoom = Vector2(4, 4);
+		test_camera->set_zoom(zoom);
+		CHECK(test_camera->get_zoom() == zoom);
+	}
+
+	SUBCASE("Offset") {
+		const Vector2 offset = Vector2(100, 100);
+		test_camera->set_offset(offset);
+		CHECK(test_camera->get_offset() == offset);
+	}
+
+	SUBCASE("Limit") {
+		constexpr int limit_left = 100;
+		constexpr int limit_top = 100;
+		constexpr int limit_right = 100;
+		constexpr int limit_bottom = 100;
+		test_camera->set_limit_smoothing_enabled(true);
+		CHECK(test_camera->is_limit_smoothing_enabled());
+		test_camera->set_limit_smoothing_enabled(false);
+		CHECK_FALSE(test_camera->is_limit_smoothing_enabled());
+		test_camera->set_limit(SIDE_LEFT, limit_left);
+		CHECK(test_camera->get_limit(SIDE_LEFT) == limit_left);
+		test_camera->set_limit(SIDE_TOP, limit_top);
+		CHECK(test_camera->get_limit(SIDE_TOP) == limit_top);
+		test_camera->set_limit(SIDE_RIGHT, limit_right);
+		CHECK(test_camera->get_limit(SIDE_RIGHT) == limit_right);
+		test_camera->set_limit(SIDE_BOTTOM, limit_bottom);
+		CHECK(test_camera->get_limit(SIDE_BOTTOM) == limit_bottom);
+	}
+
+	SUBCASE("Position") {
+		constexpr float smoothing_speed = 20.0f;
+		test_camera->set_position_smoothing_enabled(true);
+		CHECK(test_camera->is_position_smoothing_enabled());
+		test_camera->set_position_smoothing_speed(smoothing_speed);
+		CHECK(test_camera->get_position_smoothing_speed() == smoothing_speed);
+	}
+
+	memdelete(test_camera);
+}
+
+TEST_CASE("[SceneTree][Camera2D] Camera positioning") {
+	SubViewport *mock_viewport = memnew(SubViewport);
+	Camera2D *test_camera = memnew(Camera2D);
+
+	mock_viewport->set_size(Vector2(400, 200));
+	SceneTree::get_singleton()->get_root()->add_child(mock_viewport);
+	mock_viewport->add_child(test_camera);
+
+	SUBCASE("Anchor mode") {
+		test_camera->set_anchor_mode(Camera2D::ANCHOR_MODE_DRAG_CENTER);
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(0, 0)));
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 0)));
+
+		test_camera->set_anchor_mode(Camera2D::ANCHOR_MODE_FIXED_TOP_LEFT);
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(200, 100)));
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 0)));
+	}
+
+	SUBCASE("Offset") {
+		test_camera->set_offset(Vector2(100, 100));
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(100, 100)));
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 0)));
+
+		test_camera->set_offset(Vector2(-100, 300));
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(-100, 300)));
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 0)));
+
+		test_camera->set_offset(Vector2(0, 0));
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(0, 0)));
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 0)));
+	}
+
+	SUBCASE("Limits") {
+		test_camera->set_limit(SIDE_LEFT, 100);
+		test_camera->set_limit(SIDE_TOP, 50);
+
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(300, 150)));
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 0)));
+
+		test_camera->set_limit(SIDE_LEFT, 0);
+		test_camera->set_limit(SIDE_TOP, 0);
+
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(200, 100)));
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 0)));
+	}
+
+	SUBCASE("Drag") {
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(0, 0)));
+
+		// horizontal
+		test_camera->set_drag_horizontal_enabled(true);
+		test_camera->set_drag_margin(SIDE_RIGHT, 0.5);
+
+		test_camera->set_position(Vector2(100, 100));
+		test_camera->force_update_scroll();
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 100)));
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(0, 100)));
+		test_camera->set_position(Vector2(101, 101));
+		test_camera->force_update_scroll();
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(1, 101)));
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(1, 101)));
+
+		// test align
+		test_camera->set_position(Vector2(0, 0));
+		test_camera->align();
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(0, 0)));
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(0, 0)));
+
+		// vertical
+		test_camera->set_drag_vertical_enabled(true);
+		test_camera->set_drag_horizontal_enabled(false);
+		test_camera->set_drag_margin(SIDE_TOP, 0.3);
+
+		test_camera->set_position(Vector2(200, -20));
+		test_camera->force_update_scroll();
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(200, 0)));
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(200, 0)));
+		test_camera->set_position(Vector2(250, -55));
+		test_camera->force_update_scroll();
+		CHECK(test_camera->get_camera_position().is_equal_approx(Vector2(250, -25)));
+		CHECK(test_camera->get_camera_screen_center().is_equal_approx(Vector2(250, -25)));
+	}
+
+	memdelete(test_camera);
+	memdelete(mock_viewport);
+}
+
+TEST_CASE("[SceneTree][Camera2D] Transforms") {
+	SubViewport *mock_viewport = memnew(SubViewport);
+	Camera2D *test_camera = memnew(Camera2D);
+
+	mock_viewport->set_size(Vector2(400, 200));
+	SceneTree::get_singleton()->get_root()->add_child(mock_viewport);
+	mock_viewport->add_child(test_camera);
+
+	SUBCASE("Default camera") {
+		Transform2D xform = mock_viewport->get_canvas_transform();
+		// x,y are basis vectors, origin = screen center
+		Transform2D test_xform = Transform2D(Vector2(1, 0), Vector2(0, 1), Vector2(200, 100));
+		CHECK(xform.is_equal_approx(test_xform));
+	}
+
+	SUBCASE("Zoom") {
+		test_camera->set_zoom(Vector2(0.5, 2));
+		Transform2D xform = mock_viewport->get_canvas_transform();
+		Transform2D test_xform = Transform2D(Vector2(0.5, 0), Vector2(0, 2), Vector2(200, 100));
+		CHECK(xform.is_equal_approx(test_xform));
+
+		test_camera->set_zoom(Vector2(10, 10));
+		xform = mock_viewport->get_canvas_transform();
+		test_xform = Transform2D(Vector2(10, 0), Vector2(0, 10), Vector2(200, 100));
+		CHECK(xform.is_equal_approx(test_xform));
+
+		test_camera->set_zoom(Vector2(1, 1));
+		xform = mock_viewport->get_canvas_transform();
+		test_xform = Transform2D(Vector2(1, 0), Vector2(0, 1), Vector2(200, 100));
+		CHECK(xform.is_equal_approx(test_xform));
+	}
+
+	SUBCASE("Rotation") {
+		test_camera->set_rotation(Math_PI / 2);
+		Transform2D xform = mock_viewport->get_canvas_transform();
+		Transform2D test_xform = Transform2D(Vector2(1, 0), Vector2(0, 1), Vector2(200, 100));
+		CHECK(xform.is_equal_approx(test_xform));
+
+		test_camera->set_ignore_rotation(false);
+		xform = mock_viewport->get_canvas_transform();
+		test_xform = Transform2D(Vector2(0, -1), Vector2(1, 0), Vector2(200, 100));
+		CHECK(xform.is_equal_approx(test_xform));
+
+		test_camera->set_rotation(-1 * Math_PI);
+		test_camera->force_update_scroll();
+		xform = mock_viewport->get_canvas_transform();
+		test_xform = Transform2D(Vector2(-1, 0), Vector2(0, -1), Vector2(200, 100));
+		CHECK(xform.is_equal_approx(test_xform));
+
+		test_camera->set_rotation(0);
+		test_camera->force_update_scroll();
+		xform = mock_viewport->get_canvas_transform();
+		test_xform = Transform2D(Vector2(1, 0), Vector2(0, 1), Vector2(200, 100));
+		CHECK(xform.is_equal_approx(test_xform));
+	}
+
+	memdelete(test_camera);
+	memdelete(mock_viewport);
+}
+
+} // namespace TestCamera2D
+
+#endif // TEST_CAMERA_2D_H
diff --git a/tests/test_main.cpp b/tests/test_main.cpp
index 24eb84127b..bb6837c965 100644
--- a/tests/test_main.cpp
+++ b/tests/test_main.cpp
@@ -44,6 +44,7 @@
 #include "tests/core/io/test_file_access.h"
 #include "tests/core/io/test_http_client.h"
 #include "tests/core/io/test_image.h"
+#include "tests/core/io/test_ip.h"
 #include "tests/core/io/test_json.h"
 #include "tests/core/io/test_marshalls.h"
 #include "tests/core/io/test_pck_packer.h"
@@ -85,6 +86,7 @@
 #include "tests/core/templates/test_list.h"
 #include "tests/core/templates/test_local_vector.h"
 #include "tests/core/templates/test_lru.h"
+#include "tests/core/templates/test_oa_hash_map.h"
 #include "tests/core/templates/test_paged_array.h"
 #include "tests/core/templates/test_rid.h"
 #include "tests/core/templates/test_vector.h"
@@ -101,6 +103,7 @@
 #include "tests/scene/test_arraymesh.h"
 #include "tests/scene/test_audio_stream_wav.h"
 #include "tests/scene/test_bit_map.h"
+#include "tests/scene/test_camera_2d.h"
 #include "tests/scene/test_code_edit.h"
 #include "tests/scene/test_color_picker.h"
 #include "tests/scene/test_control.h"
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 8fabe8a893..5ecbffac13 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -318,7 +318,7 @@ Files extracted from upstream source:
 ## glad
 
 - Upstream: https://github.com/Dav1dde/glad
-- Version: 2.0.4 (d08b1aa01f8fe57498f04d47b5fa8c48725be877, 2023)
+- Version: 2.0.6 (658f48e72aee3c6582e80b05ac0f8787a64fe6bb, 2024)
 - License: CC0 1.0 and Apache 2.0
 
 Files extracted from upstream source:
@@ -334,11 +334,11 @@ Files generated from [upstream web instance](https://gen.glad.sh/):
 - `glx.c`
 - `glad/glx.h`
 
-See the permalinks in `glad/gl.h` and `glad/glx.h` to regenrate the files with
-a new version of the web instance.
+See the permalinks in `glad/egl.h`, `glad/gl.h`, and `glad/glx.h` to regenrate
+the files with a new version of the web instance.
 
-Some changes have been made in order to allow loading OpenGL and OpenGLES APIs at the same time.
-See the patches in the `patches` directory.
+Some changes have been made in order to allow loading OpenGL and OpenGLES APIs
+at the same time. See the patches in the `patches` directory.
 
 
 ## glslang
@@ -377,16 +377,15 @@ Files extracted from upstream source:
 ## harfbuzz
 
 - Upstream: https://github.com/harfbuzz/harfbuzz
-- Version: 8.3.0 (894a1f72ee93a1fd8dc1d9218cb3fd8f048be29a, 2023)
+- Version: 8.4.0 (63973005bc07aba599b47fdd4cf788647b601ccd, 2024)
 - License: MIT
 
 Files extracted from upstream source:
 
 - `AUTHORS`, `COPYING`, `THANKS`
 - From the `src` folder, recursively:
-  - All the `.c`, `.cc`, `.h`, `.hh` files
-  - Except `main.cc`, `harfbuzz*.cc`, `failing-alloc.c`, `test*.cc`, `hb-wasm*.*`,
-    and the `wasm` folder
+  - All the `.cc`, `.h`, `.hh` files
+  - Except `main.cc`, `harfbuzz*.cc`, `failing-alloc.c`, `test*.cc`, `hb-wasm*.*`
 
 
 ## icu4c
@@ -432,18 +431,18 @@ Files extracted from upstream source:
 ## libktx
 
 - Upstream: https://github.com/KhronosGroup/KTX-Software
-- Version: 4.3.1 (c0214158d551cfc779624b0f84130bcbbefef59a, 2024)
+- Version: 4.3.2 (91ace88675ac59a97e55d0378a6602a9ae6b98bd, 2024)
 - License: Apache-2.0
 
 Files extracted from upstream source:
 
 - `LICENSE.md`
-- `include/*`
+- `include/`
 - `lib/dfdutils/LICENSE.adoc` as `LICENSE.dfdutils.adoc` (in root)
 - `lib/dfdutils/LICENSES/Apache-2.0.txt` as `Apache-2.0.txt` (in root)
-- `lib/dfdutils/{KHR/*,dfd.h,colourspaces.c,createdfd.c,interpretdfd.c,printdfd.c,queries.c,dfd2vk.inl,vk2dfd.*}`
-- `lib/{basis_sgd.h,formatsize.h,gl_format.h,ktxint.h,uthash.h,vk_format.h,vkformat_enum.h,checkheader.c,swap.c,hashlist.c,vkformat_check.c,basis_transcode.cpp,miniz_wrapper.cpp,filestream.*,memstream.*,texture*}`
-- `other_include/KHR/*`
+- `lib/dfdutils/{KHR/,dfd.h,colourspaces.c,createdfd.c,interpretdfd.c,printdfd.c,queries.c,dfd2vk.inl,vk2dfd.*}`
+- `lib/{basis_sgd.h,formatsize.h,gl_format.h,ktxint.h,uthash.h,vk_format.h,vkformat_enum.h,checkheader.c,swap.c,hashlist.c,vkformat_check.c,vkformat_typesize.c,basis_transcode.cpp,miniz_wrapper.cpp,filestream.*,memstream.*,texture*}`
+- `other_include/KHR/`
 - `utils/unused.h`
 
 Some Godot-specific changes are applied via patches included in the `patches` folder.
@@ -521,7 +520,7 @@ in the MSVC debugger.
 ## mbedtls
 
 - Upstream: https://github.com/Mbed-TLS/mbedtls
-- Version: 2.28.7 (555f84735aecdbd76a566cf087ec8425dfb0c8ab, 2024)
+- Version: 2.28.8 (5a764e5555c64337ed17444410269ff21cb617b1, 2024)
 - License: Apache 2.0
 
 File extracted from upstream release tarball:
@@ -592,7 +591,7 @@ to solve some MSVC warnings. See the patches in the `patches` directory.
 ## miniupnpc
 
 - Upstream: https://github.com/miniupnp/miniupnp
-- Version: 2.2.6 (faad29d7300f1bfa9dc7795031993c04c5191f59, 2024)
+- Version: 2.2.7 (d4d5ec7d48c093b37b2ea5d7171ede21ce9d7ff2, 2024)
 - License: BSD-3-Clause
 
 Files extracted from upstream source:
@@ -1036,7 +1035,7 @@ Files extracted from upstream source:
 ## zstd
 
 - Upstream: https://github.com/facebook/zstd
-- Version: 1.5.5 (63779c798237346c2b245c546c40b72a5a5913fe, 2023)
+- Version: 1.5.6 (794ea1b0afca0f020f4e57b6732332231fb23c70, 2024)
 - License: BSD-3-Clause
 
 Files extracted from upstream source:
diff --git a/thirdparty/glad/EGL/eglplatform.h b/thirdparty/glad/EGL/eglplatform.h
index 99362a23de..6786afd90b 100644
--- a/thirdparty/glad/EGL/eglplatform.h
+++ b/thirdparty/glad/EGL/eglplatform.h
@@ -64,6 +64,12 @@ typedef HDC     EGLNativeDisplayType;
 typedef HBITMAP EGLNativePixmapType;
 typedef HWND    EGLNativeWindowType;
 
+#elif defined(__QNX__)
+
+typedef khronos_uintptr_t      EGLNativeDisplayType;
+typedef struct _screen_pixmap* EGLNativePixmapType;  /* screen_pixmap_t */
+typedef struct _screen_window* EGLNativeWindowType;  /* screen_window_t */
+
 #elif defined(__EMSCRIPTEN__)
 
 typedef int EGLNativeDisplayType;
diff --git a/thirdparty/glad/gl.c b/thirdparty/glad/gl.c
index ee0cc188fc..38ecb514bd 100644
--- a/thirdparty/glad/gl.c
+++ b/thirdparty/glad/gl.c
@@ -453,6 +453,7 @@ PFNGLMULTITEXCOORDP3UIPROC glad_glMultiTexCoordP3ui = NULL;
 PFNGLMULTITEXCOORDP3UIVPROC glad_glMultiTexCoordP3uiv = NULL;
 PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui = NULL;
 PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv = NULL;
+PFNGLNAMEDFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glad_glNamedFramebufferTextureMultiviewOVR = NULL;
 PFNGLNEWLISTPROC glad_glNewList = NULL;
 PFNGLNORMAL3BPROC glad_glNormal3b = NULL;
 PFNGLNORMAL3BVPROC glad_glNormal3bv = NULL;
@@ -2108,40 +2109,29 @@ static void glad_gl_load_GL_EXT_framebuffer_object( GLADuserptrloadfunc load, vo
 static void glad_gl_load_GL_OVR_multiview( GLADuserptrloadfunc load, void* userptr) {
     if(!GLAD_GL_OVR_multiview) return;
     glad_glFramebufferTextureMultiviewOVR = (PFNGLFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC) load(userptr, "glFramebufferTextureMultiviewOVR");
+    glad_glNamedFramebufferTextureMultiviewOVR = (PFNGLNAMEDFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC) load(userptr, "glNamedFramebufferTextureMultiviewOVR");
 }
 
 
 
-#if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0)
-#define GLAD_GL_IS_SOME_NEW_VERSION 1
-#else
-#define GLAD_GL_IS_SOME_NEW_VERSION 0
-#endif
-
-static int glad_gl_get_extensions( int version, const char **out_exts, unsigned int *out_num_exts_i, char ***out_exts_i) {
-#if GLAD_GL_IS_SOME_NEW_VERSION
-    if(GLAD_VERSION_MAJOR(version) < 3) {
-#else
-    GLAD_UNUSED(version);
-    GLAD_UNUSED(out_num_exts_i);
-    GLAD_UNUSED(out_exts_i);
-#endif
-        if (glad_glGetString == NULL) {
-            return 0;
+static void glad_gl_free_extensions(char **exts_i) {
+    if (exts_i != NULL) {
+        unsigned int index;
+        for(index = 0; exts_i[index]; index++) {
+            free((void *) (exts_i[index]));
         }
-        *out_exts = (const char *)glad_glGetString(GL_EXTENSIONS);
-#if GLAD_GL_IS_SOME_NEW_VERSION
-    } else {
+        free((void *)exts_i);
+        exts_i = NULL;
+    }
+}
+static int glad_gl_get_extensions( const char **out_exts, char ***out_exts_i) {
+#if defined(GL_ES_VERSION_3_0) || defined(GL_VERSION_3_0)
+    if (glad_glGetStringi != NULL && glad_glGetIntegerv != NULL) {
         unsigned int index = 0;
         unsigned int num_exts_i = 0;
         char **exts_i = NULL;
-        if (glad_glGetStringi == NULL || glad_glGetIntegerv == NULL) {
-            return 0;
-        }
         glad_glGetIntegerv(GL_NUM_EXTENSIONS, (int*) &num_exts_i);
-        if (num_exts_i > 0) {
-            exts_i = (char **) malloc(num_exts_i * (sizeof *exts_i));
-        }
+        exts_i = (char **) malloc((num_exts_i + 1) * (sizeof *exts_i));
         if (exts_i == NULL) {
             return 0;
         }
@@ -2150,31 +2140,40 @@ static int glad_gl_get_extensions( int version, const char **out_exts, unsigned
             size_t len = strlen(gl_str_tmp) + 1;
 
             char *local_str = (char*) malloc(len * sizeof(char));
-            if(local_str != NULL) {
-                memcpy(local_str, gl_str_tmp, len * sizeof(char));
+            if(local_str == NULL) {
+                exts_i[index] = NULL;
+                glad_gl_free_extensions(exts_i);
+                return 0;
             }
 
+            memcpy(local_str, gl_str_tmp, len * sizeof(char));
             exts_i[index] = local_str;
         }
+        exts_i[index] = NULL;
 
-        *out_num_exts_i = num_exts_i;
         *out_exts_i = exts_i;
+
+        return 1;
     }
+#else
+    GLAD_UNUSED(out_exts_i);
 #endif
+    if (glad_glGetString == NULL) {
+        return 0;
+    }
+    *out_exts = (const char *)glad_glGetString(GL_EXTENSIONS);
     return 1;
 }
-static void glad_gl_free_extensions(char **exts_i, unsigned int num_exts_i) {
-    if (exts_i != NULL) {
+static int glad_gl_has_extension(const char *exts, char **exts_i, const char *ext) {
+    if(exts_i) {
         unsigned int index;
-        for(index = 0; index < num_exts_i; index++) {
-            free((void *) (exts_i[index]));
+        for(index = 0; exts_i[index]; index++) {
+            const char *e = exts_i[index];
+            if(strcmp(e, ext) == 0) {
+                return 1;
+            }
         }
-        free((void *)exts_i);
-        exts_i = NULL;
-    }
-}
-static int glad_gl_has_extension(int version, const char *exts, unsigned int num_exts_i, char **exts_i, const char *ext) {
-    if(GLAD_VERSION_MAJOR(version) < 3 || !GLAD_GL_IS_SOME_NEW_VERSION) {
+    } else {
         const char *extensions;
         const char *loc;
         const char *terminator;
@@ -2194,14 +2193,6 @@ static int glad_gl_has_extension(int version, const char *exts, unsigned int num
             }
             extensions = terminator;
         }
-    } else {
-        unsigned int index;
-        for(index = 0; index < num_exts_i; index++) {
-            const char *e = exts_i[index];
-            if(strcmp(e, ext) == 0) {
-                return 1;
-            }
-        }
     }
     return 0;
 }
@@ -2210,22 +2201,21 @@ static GLADapiproc glad_gl_get_proc_from_userptr(void *userptr, const char* name
     return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name);
 }
 
-static int glad_gl_find_extensions_gl( int version) {
+static int glad_gl_find_extensions_gl(void) {
     const char *exts = NULL;
-    unsigned int num_exts_i = 0;
     char **exts_i = NULL;
-    if (!glad_gl_get_extensions(version, &exts, &num_exts_i, &exts_i)) return 0;
+    if (!glad_gl_get_extensions(&exts, &exts_i)) return 0;
 
-    GLAD_GL_ARB_debug_output = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_ARB_debug_output");
-    GLAD_GL_ARB_framebuffer_object = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_ARB_framebuffer_object");
-    GLAD_GL_ARB_get_program_binary = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_ARB_get_program_binary");
-    GLAD_GL_EXT_framebuffer_blit = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_EXT_framebuffer_blit");
-    GLAD_GL_EXT_framebuffer_multisample = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_EXT_framebuffer_multisample");
-    GLAD_GL_EXT_framebuffer_object = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_EXT_framebuffer_object");
-    GLAD_GL_OVR_multiview = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OVR_multiview");
-    GLAD_GL_OVR_multiview2 = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OVR_multiview2");
+    GLAD_GL_ARB_debug_output = glad_gl_has_extension(exts, exts_i, "GL_ARB_debug_output");
+    GLAD_GL_ARB_framebuffer_object = glad_gl_has_extension(exts, exts_i, "GL_ARB_framebuffer_object");
+    GLAD_GL_ARB_get_program_binary = glad_gl_has_extension(exts, exts_i, "GL_ARB_get_program_binary");
+    GLAD_GL_EXT_framebuffer_blit = glad_gl_has_extension(exts, exts_i, "GL_EXT_framebuffer_blit");
+    GLAD_GL_EXT_framebuffer_multisample = glad_gl_has_extension(exts, exts_i, "GL_EXT_framebuffer_multisample");
+    GLAD_GL_EXT_framebuffer_object = glad_gl_has_extension(exts, exts_i, "GL_EXT_framebuffer_object");
+    GLAD_GL_OVR_multiview = glad_gl_has_extension(exts, exts_i, "GL_OVR_multiview");
+    GLAD_GL_OVR_multiview2 = glad_gl_has_extension(exts, exts_i, "GL_OVR_multiview2");
 
-    glad_gl_free_extensions(exts_i, num_exts_i);
+    glad_gl_free_extensions(exts_i);
 
     return 1;
 }
@@ -2275,7 +2265,6 @@ int gladLoadGLUserPtr( GLADuserptrloadfunc load, void *userptr) {
 
     glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString");
     if(glad_glGetString == NULL) return 0;
-    if(glad_glGetString(GL_VERSION) == NULL) return 0;
     version = glad_gl_find_core_gl();
 
     glad_gl_load_GL_VERSION_1_0(load, userptr);
@@ -2291,7 +2280,7 @@ int gladLoadGLUserPtr( GLADuserptrloadfunc load, void *userptr) {
     glad_gl_load_GL_VERSION_3_2(load, userptr);
     glad_gl_load_GL_VERSION_3_3(load, userptr);
 
-    if (!glad_gl_find_extensions_gl(version)) return 0;
+    if (!glad_gl_find_extensions_gl()) return 0;
     glad_gl_load_GL_ARB_debug_output(load, userptr);
     glad_gl_load_GL_ARB_framebuffer_object(load, userptr);
     glad_gl_load_GL_ARB_get_program_binary(load, userptr);
@@ -2310,16 +2299,15 @@ int gladLoadGL( GLADloadfunc load) {
     return gladLoadGLUserPtr( glad_gl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load);
 }
 
-static int glad_gl_find_extensions_gles2( int version) {
+static int glad_gl_find_extensions_gles2(void) {
     const char *exts = NULL;
-    unsigned int num_exts_i = 0;
     char **exts_i = NULL;
-    if (!glad_gl_get_extensions(version, &exts, &num_exts_i, &exts_i)) return 0;
+    if (!glad_gl_get_extensions(&exts, &exts_i)) return 0;
 
-    GLAD_GL_OVR_multiview = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OVR_multiview");
-    GLAD_GL_OVR_multiview2 = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OVR_multiview2");
+    GLAD_GL_OVR_multiview = glad_gl_has_extension(exts, exts_i, "GL_OVR_multiview");
+    GLAD_GL_OVR_multiview2 = glad_gl_has_extension(exts, exts_i, "GL_OVR_multiview2");
 
-    glad_gl_free_extensions(exts_i, num_exts_i);
+    glad_gl_free_extensions(exts_i);
 
     return 1;
 }
@@ -2361,7 +2349,6 @@ int gladLoadGLES2UserPtr( GLADuserptrloadfunc load, void *userptr) {
 
     glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString");
     if(glad_glGetString == NULL) return 0;
-    if(glad_glGetString(GL_VERSION) == NULL) return 0;
     version = glad_gl_find_core_gles2();
 
     glad_gl_load_GL_ES_VERSION_2_0(load, userptr);
@@ -2369,7 +2356,7 @@ int gladLoadGLES2UserPtr( GLADuserptrloadfunc load, void *userptr) {
     glad_gl_load_GL_ES_VERSION_3_1(load, userptr);
     glad_gl_load_GL_ES_VERSION_3_2(load, userptr);
 
-    if (!glad_gl_find_extensions_gles2(version)) return 0;
+    if (!glad_gl_find_extensions_gles2()) return 0;
     glad_gl_load_GL_OVR_multiview(load, userptr);
 
 
@@ -2627,10 +2614,9 @@ static GLADapiproc glad_dlsym_handle(void* handle, const char *name) {
   typedef __eglMustCastToProperFunctionPointerType (GLAD_API_PTR *PFNEGLGETPROCADDRESSPROC)(const char *name);
 #endif
   extern __eglMustCastToProperFunctionPointerType emscripten_GetProcAddress(const char *name);
-#elif EGL_STATIC
-  typedef void (*__eglMustCastToProperFunctionPointerType)(void);
+#elif defined(GLAD_GLES2_USE_SYSTEM_EGL)
+  #include <EGL/egl.h>
   typedef __eglMustCastToProperFunctionPointerType (GLAD_API_PTR *PFNEGLGETPROCADDRESSPROC)(const char *name);
-  extern __eglMustCastToProperFunctionPointerType GLAD_API_PTR eglGetProcAddress(const char *name);
 #else
   #include <glad/egl.h>
 #endif
@@ -2658,7 +2644,7 @@ static GLADapiproc glad_gles2_get_proc(void *vuserptr, const char* name) {
     return result;
 }
 
-static void* _glad_GL_loader_handle = NULL;
+static void* _glad_GLES2_loader_handle = NULL;
 
 static void* glad_gles2_dlopen_handle(void) {
 #if GLAD_PLATFORM_EMSCRIPTEN
@@ -2674,11 +2660,11 @@ static void* glad_gles2_dlopen_handle(void) {
     GLAD_UNUSED(glad_get_dlopen_handle);
     return NULL;
 #else
-    if (_glad_GL_loader_handle == NULL) {
-        _glad_GL_loader_handle = glad_get_dlopen_handle(NAMES, sizeof(NAMES) / sizeof(NAMES[0]));
+    if (_glad_GLES2_loader_handle == NULL) {
+        _glad_GLES2_loader_handle = glad_get_dlopen_handle(NAMES, sizeof(NAMES) / sizeof(NAMES[0]));
     }
 
-    return _glad_GL_loader_handle;
+    return _glad_GLES2_loader_handle;
 #endif
 }
 
@@ -2708,11 +2694,12 @@ int gladLoaderLoadGLES2(void) {
     userptr.get_proc_address_ptr = emscripten_GetProcAddress;
     version = gladLoadGLES2UserPtr(glad_gles2_get_proc, &userptr);
 #else
+#ifndef GLAD_GLES2_USE_SYSTEM_EGL
     if (eglGetProcAddress == NULL) {
         return 0;
     }
-
-    did_load = _glad_GL_loader_handle == NULL;
+#endif
+    did_load = _glad_GLES2_loader_handle == NULL;
     handle = glad_gles2_dlopen_handle();
     if (handle != NULL) {
         userptr = glad_gles2_build_userptr(handle);
@@ -2731,9 +2718,9 @@ int gladLoaderLoadGLES2(void) {
 
 
 void gladLoaderUnloadGLES2(void) {
-    if (_glad_GL_loader_handle != NULL) {
-        glad_close_dlopen_handle(_glad_GL_loader_handle);
-        _glad_GL_loader_handle = NULL;
+    if (_glad_GLES2_loader_handle != NULL) {
+        glad_close_dlopen_handle(_glad_GLES2_loader_handle);
+        _glad_GLES2_loader_handle = NULL;
     }
 }
 
diff --git a/thirdparty/glad/glad/egl.h b/thirdparty/glad/glad/egl.h
index 1bf35c1404..053c5853a7 100644
--- a/thirdparty/glad/glad/egl.h
+++ b/thirdparty/glad/glad/egl.h
@@ -1,5 +1,5 @@
 /**
- * Loader generated by glad 2.0.3 on Fri Feb  3 07:06:48 2023
+ * Loader generated by glad 2.0.6 on Fri Apr  5 08:17:09 2024
  *
  * SPDX-License-Identifier: (WTFPL OR CC0-1.0) AND Apache-2.0
  *
@@ -141,7 +141,7 @@ extern "C" {
 #define GLAD_VERSION_MAJOR(version) (version / 10000)
 #define GLAD_VERSION_MINOR(version) (version % 10000)
 
-#define GLAD_GENERATOR_VERSION "2.0.3"
+#define GLAD_GENERATOR_VERSION "2.0.6"
 
 typedef void (*GLADapiproc)(void);
 
diff --git a/thirdparty/glad/glad/gl.h b/thirdparty/glad/glad/gl.h
index 307ea4dbb8..1301d10b65 100644
--- a/thirdparty/glad/glad/gl.h
+++ b/thirdparty/glad/glad/gl.h
@@ -1,5 +1,5 @@
 /**
- * Loader generated by glad 2.0.4 on Mon May 22 13:18:29 2023
+ * Loader generated by glad 2.0.6 on Fri Apr  5 08:14:44 2024
  *
  * SPDX-License-Identifier: (WTFPL OR CC0-1.0) AND Apache-2.0
  *
@@ -178,7 +178,7 @@ extern "C" {
 #define GLAD_VERSION_MAJOR(version) (version / 10000)
 #define GLAD_VERSION_MINOR(version) (version % 10000)
 
-#define GLAD_GENERATOR_VERSION "2.0.4"
+#define GLAD_GENERATOR_VERSION "2.0.6"
 
 typedef void (*GLADapiproc)(void);
 
@@ -2394,6 +2394,7 @@ typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP3UIPROC)(GLenum texture, GLenum t
 typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP3UIVPROC)(GLenum texture, GLenum type, const GLuint * coords);
 typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP4UIPROC)(GLenum texture, GLenum type, GLuint coords);
 typedef void (GLAD_API_PTR *PFNGLMULTITEXCOORDP4UIVPROC)(GLenum texture, GLenum type, const GLuint * coords);
+typedef void (GLAD_API_PTR *PFNGLNAMEDFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC)(GLuint framebuffer, GLenum attachment, GLuint texture, GLint level, GLint baseViewIndex, GLsizei numViews);
 typedef void (GLAD_API_PTR *PFNGLNEWLISTPROC)(GLuint list, GLenum mode);
 typedef void (GLAD_API_PTR *PFNGLNORMAL3BPROC)(GLbyte nx, GLbyte ny, GLbyte nz);
 typedef void (GLAD_API_PTR *PFNGLNORMAL3BVPROC)(const GLbyte * v);
@@ -3654,6 +3655,8 @@ GLAD_API_CALL PFNGLMULTITEXCOORDP4UIPROC glad_glMultiTexCoordP4ui;
 #define glMultiTexCoordP4ui glad_glMultiTexCoordP4ui
 GLAD_API_CALL PFNGLMULTITEXCOORDP4UIVPROC glad_glMultiTexCoordP4uiv;
 #define glMultiTexCoordP4uiv glad_glMultiTexCoordP4uiv
+GLAD_API_CALL PFNGLNAMEDFRAMEBUFFERTEXTUREMULTIVIEWOVRPROC glad_glNamedFramebufferTextureMultiviewOVR;
+#define glNamedFramebufferTextureMultiviewOVR glad_glNamedFramebufferTextureMultiviewOVR
 GLAD_API_CALL PFNGLNEWLISTPROC glad_glNewList;
 #define glNewList glad_glNewList
 GLAD_API_CALL PFNGLNORMAL3BPROC glad_glNormal3b;
diff --git a/thirdparty/glad/glad/glx.h b/thirdparty/glad/glad/glx.h
index cf7663a3af..a2fa0dadee 100644
--- a/thirdparty/glad/glad/glx.h
+++ b/thirdparty/glad/glad/glx.h
@@ -1,5 +1,5 @@
 /**
- * Loader generated by glad 2.0.4 on Mon May 22 13:18:29 2023
+ * Loader generated by glad 2.0.6 on Fri Apr  5 08:14:31 2024
  *
  * SPDX-License-Identifier: (WTFPL OR CC0-1.0) AND Apache-2.0
  *
@@ -152,7 +152,7 @@ extern "C" {
 #define GLAD_VERSION_MAJOR(version) (version / 10000)
 #define GLAD_VERSION_MINOR(version) (version % 10000)
 
-#define GLAD_GENERATOR_VERSION "2.0.4"
+#define GLAD_GENERATOR_VERSION "2.0.6"
 
 typedef void (*GLADapiproc)(void);
 
diff --git a/thirdparty/glad/patches/patch_enable_both_gl_and_gles.diff b/thirdparty/glad/patches/patch_enable_both_gl_and_gles.diff
index a98efe51d8..88c5510166 100644
--- a/thirdparty/glad/patches/patch_enable_both_gl_and_gles.diff
+++ b/thirdparty/glad/patches/patch_enable_both_gl_and_gles.diff
@@ -1,8 +1,8 @@
 diff --git a/thirdparty/glad/gl.c b/thirdparty/glad/gl.c
-index a0b59dbbfb..9f10f6544a 100644
+index 3f0884a3dc..38ecb514bd 100644
 --- a/thirdparty/glad/gl.c
 +++ b/thirdparty/glad/gl.c
-@@ -2475,7 +2475,7 @@ static GLADapiproc glad_gl_get_proc(void *vuserptr, const char *name) {
+@@ -2462,7 +2462,7 @@ static GLADapiproc glad_gl_get_proc(void *vuserptr, const char *name) {
      return result;
  }
  
@@ -11,7 +11,7 @@ index a0b59dbbfb..9f10f6544a 100644
  
  static void* glad_gl_dlopen_handle(void) {
  #if GLAD_PLATFORM_APPLE
-@@ -2497,11 +2497,11 @@ static void* glad_gl_dlopen_handle(void) {
+@@ -2484,11 +2484,11 @@ static void* glad_gl_dlopen_handle(void) {
      };
  #endif
  
@@ -26,7 +26,7 @@ index a0b59dbbfb..9f10f6544a 100644
  }
  
  static struct _glad_gl_userptr glad_gl_build_userptr(void *handle) {
-@@ -2527,7 +2527,7 @@ int gladLoaderLoadGL(void) {
+@@ -2514,7 +2514,7 @@ int gladLoaderLoadGL(void) {
      int did_load = 0;
      struct _glad_gl_userptr userptr;
  
@@ -35,7 +35,7 @@ index a0b59dbbfb..9f10f6544a 100644
      handle = glad_gl_dlopen_handle();
      if (handle) {
          userptr = glad_gl_build_userptr(handle);
-@@ -2545,9 +2545,9 @@ int gladLoaderLoadGL(void) {
+@@ -2532,9 +2532,9 @@ int gladLoaderLoadGL(void) {
  
  
  void gladLoaderUnloadGL(void) {
@@ -49,7 +49,7 @@ index a0b59dbbfb..9f10f6544a 100644
  }
  
 diff --git a/thirdparty/glad/glad/gl.h b/thirdparty/glad/glad/gl.h
-index 905c16aeed..f3cb7d8cb5 100644
+index 77c6f33cab..1301d10b65 100644
 --- a/thirdparty/glad/glad/gl.h
 +++ b/thirdparty/glad/glad/gl.h
 @@ -67,6 +67,7 @@
diff --git a/thirdparty/harfbuzz/src/OT/Color/COLR/COLR.hh b/thirdparty/harfbuzz/src/OT/Color/COLR/COLR.hh
index b632a1d9eb..623775a771 100644
--- a/thirdparty/harfbuzz/src/OT/Color/COLR/COLR.hh
+++ b/thirdparty/harfbuzz/src/OT/Color/COLR/COLR.hh
@@ -68,7 +68,7 @@ public:
   hb_font_t *font;
   unsigned int palette_index;
   hb_color_t foreground;
-  VarStoreInstancer &instancer;
+  ItemVarStoreInstancer &instancer;
   hb_map_t current_glyphs;
   hb_map_t current_layers;
   int depth_left = HB_MAX_NESTING_LEVEL;
@@ -80,7 +80,7 @@ public:
                       hb_font_t *font_,
                       unsigned int palette_,
                       hb_color_t foreground_,
-		      VarStoreInstancer &instancer_) :
+		      ItemVarStoreInstancer &instancer_) :
     base (base_),
     funcs (funcs_),
     data (data_),
@@ -245,7 +245,7 @@ struct Variable
   { value.closurev1 (c); }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     if (!value.subset (c, instancer, varIdxBase)) return_trace (false);
@@ -270,7 +270,7 @@ struct Variable
 
   void get_color_stop (hb_paint_context_t *c,
                        hb_color_stop_t *stop,
-		       const VarStoreInstancer &instancer) const
+		       const ItemVarStoreInstancer &instancer) const
   {
     value.get_color_stop (c, stop, varIdxBase, instancer);
   }
@@ -305,7 +305,7 @@ struct NoVariable
   { value.closurev1 (c); }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     return_trace (value.subset (c, instancer, varIdxBase));
@@ -325,7 +325,7 @@ struct NoVariable
 
   void get_color_stop (hb_paint_context_t *c,
                        hb_color_stop_t *stop,
-		       const VarStoreInstancer &instancer) const
+		       const ItemVarStoreInstancer &instancer) const
   {
     value.get_color_stop (c, stop, VarIdx::NO_VARIATION, instancer);
   }
@@ -348,7 +348,7 @@ struct ColorStop
   { c->add_palette_index (paletteIndex); }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -374,7 +374,7 @@ struct ColorStop
   void get_color_stop (hb_paint_context_t *c,
                        hb_color_stop_t *out,
 		       uint32_t varIdx,
-		       const VarStoreInstancer &instancer) const
+		       const ItemVarStoreInstancer &instancer) const
   {
     out->offset = stopOffset.to_float(instancer (varIdx, 0));
     out->color = c->get_color (paletteIndex,
@@ -410,7 +410,7 @@ struct ColorLine
   }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->start_embed (this);
@@ -439,7 +439,7 @@ struct ColorLine
                    unsigned int start,
 		   unsigned int *count,
 		   hb_color_stop_t *color_stops,
-		   const VarStoreInstancer &instancer) const
+		   const ItemVarStoreInstancer &instancer) const
   {
     unsigned int len = stops.len;
 
@@ -543,7 +543,7 @@ struct Affine2x3
   }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -588,7 +588,7 @@ struct PaintColrLayers
   void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer HB_UNUSED) const
+               const ItemVarStoreInstancer &instancer HB_UNUSED) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->embed (this);
@@ -620,7 +620,7 @@ struct PaintSolid
   { c->add_palette_index (paletteIndex); }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -669,7 +669,7 @@ struct PaintLinearGradient
   { (this+colorLine).closurev1 (c); }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -736,7 +736,7 @@ struct PaintRadialGradient
   { (this+colorLine).closurev1 (c); }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -803,7 +803,7 @@ struct PaintSweepGradient
   { (this+colorLine).closurev1 (c); }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -863,7 +863,7 @@ struct PaintGlyph
   void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->embed (this);
@@ -906,7 +906,7 @@ struct PaintColrGlyph
   void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer HB_UNUSED) const
+               const ItemVarStoreInstancer &instancer HB_UNUSED) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->embed (this);
@@ -936,7 +936,7 @@ struct PaintTransform
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->embed (this);
@@ -975,7 +975,7 @@ struct PaintTranslate
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1024,7 +1024,7 @@ struct PaintScale
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1073,7 +1073,7 @@ struct PaintScaleAroundCenter
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1132,7 +1132,7 @@ struct PaintScaleUniform
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1176,7 +1176,7 @@ struct PaintScaleUniformAroundCenter
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1232,7 +1232,7 @@ struct PaintRotate
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1276,7 +1276,7 @@ struct PaintRotateAroundCenter
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1332,7 +1332,7 @@ struct PaintSkew
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1381,7 +1381,7 @@ struct PaintSkewAroundCenter
   HB_INTERNAL void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1440,7 +1440,7 @@ struct PaintComposite
   void closurev1 (hb_colrv1_closure_context_t* c) const;
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->embed (this);
@@ -1491,7 +1491,7 @@ struct ClipBoxFormat1
     return_trace (c->check_struct (this));
   }
 
-  void get_clip_box (ClipBoxData &clip_box, const VarStoreInstancer &instancer HB_UNUSED) const
+  void get_clip_box (ClipBoxData &clip_box, const ItemVarStoreInstancer &instancer HB_UNUSED) const
   {
     clip_box.xMin = xMin;
     clip_box.yMin = yMin;
@@ -1500,7 +1500,7 @@ struct ClipBoxFormat1
   }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer,
+               const ItemVarStoreInstancer &instancer,
                uint32_t varIdxBase) const
   {
     TRACE_SUBSET (this);
@@ -1533,7 +1533,7 @@ struct ClipBoxFormat1
 
 struct ClipBoxFormat2 : Variable<ClipBoxFormat1>
 {
-  void get_clip_box (ClipBoxData &clip_box, const VarStoreInstancer &instancer) const
+  void get_clip_box (ClipBoxData &clip_box, const ItemVarStoreInstancer &instancer) const
   {
     value.get_clip_box(clip_box, instancer);
     if (instancer)
@@ -1549,7 +1549,7 @@ struct ClipBoxFormat2 : Variable<ClipBoxFormat1>
 struct ClipBox
 {
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     switch (u.format) {
@@ -1572,7 +1572,7 @@ struct ClipBox
   }
 
   bool get_extents (hb_glyph_extents_t *extents,
-                    const VarStoreInstancer &instancer) const
+                    const ItemVarStoreInstancer &instancer) const
   {
     ClipBoxData clip_box;
     switch (u.format) {
@@ -1608,7 +1608,7 @@ struct ClipRecord
 
   bool subset (hb_subset_context_t *c,
                const void *base,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->embed (*this);
@@ -1625,7 +1625,7 @@ struct ClipRecord
 
   bool get_extents (hb_glyph_extents_t *extents,
 		    const void *base,
-		    const VarStoreInstancer &instancer) const
+		    const ItemVarStoreInstancer &instancer) const
   {
     return (base+clipBox).get_extents (extents, instancer);
   }
@@ -1642,7 +1642,7 @@ DECLARE_NULL_NAMESPACE_BYTES (OT, ClipRecord);
 struct ClipList
 {
   unsigned serialize_clip_records (hb_subset_context_t *c,
-                                   const VarStoreInstancer &instancer,
+                                   const ItemVarStoreInstancer &instancer,
                                    const hb_set_t& gids,
                                    const hb_map_t& gid_offset_map) const
   {
@@ -1695,7 +1695,7 @@ struct ClipList
   }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->start_embed (*this);
@@ -1735,7 +1735,7 @@ struct ClipList
   bool
   get_extents (hb_codepoint_t gid,
 	       hb_glyph_extents_t *extents,
-	       const VarStoreInstancer &instancer) const
+	       const ItemVarStoreInstancer &instancer) const
   {
     auto *rec = clips.as_array ().bsearch (gid);
     if (rec)
@@ -1855,7 +1855,7 @@ struct BaseGlyphPaintRecord
 
   bool serialize (hb_serialize_context_t *s, const hb_map_t* glyph_map,
                   const void* src_base, hb_subset_context_t *c,
-                  const VarStoreInstancer &instancer) const
+                  const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SERIALIZE (this);
     auto *out = s->embed (this);
@@ -1884,7 +1884,7 @@ struct BaseGlyphPaintRecord
 struct BaseGlyphList : SortedArray32Of<BaseGlyphPaintRecord>
 {
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->start_embed (this);
@@ -1916,7 +1916,7 @@ struct LayerList : Array32OfOffset32To<Paint>
   { return this+(*this)[i]; }
 
   bool subset (hb_subset_context_t *c,
-               const VarStoreInstancer &instancer) const
+               const ItemVarStoreInstancer &instancer) const
   {
     TRACE_SUBSET (this);
     auto *out = c->serializer->start_embed (this);
@@ -2206,7 +2206,7 @@ struct COLR
     auto snap = c->serializer->snapshot ();
     if (!c->serializer->allocate_size<void> (5 * HBUINT32::static_size)) return_trace (false);
 
-    VarStoreInstancer instancer (varStore ? &(this+varStore) : nullptr,
+    ItemVarStoreInstancer instancer (varStore ? &(this+varStore) : nullptr,
 	                         varIdxMap ? &(this+varIdxMap) : nullptr,
 	                         c->plan->normalized_coords.as_array ());
 
@@ -2250,7 +2250,7 @@ struct COLR
     if (version != 1)
       return false;
 
-    VarStoreInstancer instancer (&(this+varStore),
+    ItemVarStoreInstancer instancer (&(this+varStore),
 				 &(this+varIdxMap),
 				 hb_array (font->coords, font->num_coords));
 
@@ -2301,7 +2301,7 @@ struct COLR
 
   bool get_clip (hb_codepoint_t glyph,
 		 hb_glyph_extents_t *extents,
-		 const VarStoreInstancer instancer) const
+		 const ItemVarStoreInstancer instancer) const
   {
     return (this+clipList).get_extents (glyph,
 					extents,
@@ -2312,7 +2312,7 @@ struct COLR
   bool
   paint_glyph (hb_font_t *font, hb_codepoint_t glyph, hb_paint_funcs_t *funcs, void *data, unsigned int palette_index, hb_color_t foreground, bool clip = true) const
   {
-    VarStoreInstancer instancer (&(this+varStore),
+    ItemVarStoreInstancer instancer (&(this+varStore),
 	                         &(this+varIdxMap),
 	                         hb_array (font->coords, font->num_coords));
     hb_paint_context_t c (this, funcs, data, font, palette_index, foreground, instancer);
@@ -2327,7 +2327,7 @@ struct COLR
       {
         // COLRv1 glyph
 
-	VarStoreInstancer instancer (&(this+varStore),
+	ItemVarStoreInstancer instancer (&(this+varStore),
 				     &(this+varIdxMap),
 				     hb_array (font->coords, font->num_coords));
 
@@ -2413,7 +2413,7 @@ struct COLR
   Offset32To<LayerList>			layerList;
   Offset32To<ClipList>			clipList;   // Offset to ClipList table (may be NULL)
   Offset32To<DeltaSetIndexMap>		varIdxMap;  // Offset to DeltaSetIndexMap table (may be NULL)
-  Offset32To<VariationStore>		varStore;
+  Offset32To<ItemVariationStore>	varStore;
   public:
   DEFINE_SIZE_MIN (14);
 };
diff --git a/thirdparty/harfbuzz/src/OT/Layout/GDEF/GDEF.hh b/thirdparty/harfbuzz/src/OT/Layout/GDEF/GDEF.hh
index 14a9b5e5cd..317b96c714 100644
--- a/thirdparty/harfbuzz/src/OT/Layout/GDEF/GDEF.hh
+++ b/thirdparty/harfbuzz/src/OT/Layout/GDEF/GDEF.hh
@@ -189,7 +189,7 @@ struct CaretValueFormat3
   friend struct CaretValue;
 
   hb_position_t get_caret_value (hb_font_t *font, hb_direction_t direction,
-				 const VariationStore &var_store) const
+				 const ItemVariationStore &var_store) const
   {
     return HB_DIRECTION_IS_HORIZONTAL (direction) ?
 	   font->em_scale_x (coordinate) + (this+deviceTable).get_x_delta (font, var_store) :
@@ -251,7 +251,7 @@ struct CaretValue
   hb_position_t get_caret_value (hb_font_t *font,
 				 hb_direction_t direction,
 				 hb_codepoint_t glyph_id,
-				 const VariationStore &var_store) const
+				 const ItemVariationStore &var_store) const
   {
     switch (u.format) {
     case 1: return u.format1.get_caret_value (font, direction);
@@ -316,7 +316,7 @@ struct LigGlyph
   unsigned get_lig_carets (hb_font_t            *font,
 			   hb_direction_t        direction,
 			   hb_codepoint_t        glyph_id,
-			   const VariationStore &var_store,
+			   const ItemVariationStore &var_store,
 			   unsigned              start_offset,
 			   unsigned             *caret_count /* IN/OUT */,
 			   hb_position_t        *caret_array /* OUT */) const
@@ -372,7 +372,7 @@ struct LigCaretList
   unsigned int get_lig_carets (hb_font_t *font,
 			       hb_direction_t direction,
 			       hb_codepoint_t glyph_id,
-			       const VariationStore &var_store,
+			       const ItemVariationStore &var_store,
 			       unsigned int start_offset,
 			       unsigned int *caret_count /* IN/OUT */,
 			       hb_position_t *caret_array /* OUT */) const
@@ -609,7 +609,7 @@ struct GDEFVersion1_2
 					 * definitions--from beginning of GDEF
 					 * header (may be NULL).  Introduced
 					 * in version 0x00010002. */
-  Offset32To<VariationStore>
+  Offset32To<ItemVariationStore>
 		varStore;		/* Offset to the table of Item Variation
 					 * Store--from beginning of GDEF
 					 * header (may be NULL).  Introduced
@@ -663,21 +663,16 @@ struct GDEFVersion1_2
     auto *out = c->serializer->start_embed (*this);
     if (unlikely (!c->serializer->extend_min (out))) return_trace (false);
 
-    out->version.major = version.major;
-    out->version.minor = version.minor;
-    bool subset_glyphclassdef = out->glyphClassDef.serialize_subset (c, glyphClassDef, this, nullptr, false, true);
-    bool subset_attachlist = out->attachList.serialize_subset (c, attachList, this);
-    bool subset_markattachclassdef = out->markAttachClassDef.serialize_subset (c, markAttachClassDef, this, nullptr, false, true);
-
-    bool subset_markglyphsetsdef = false;
+    // Push var store first (if it's needed) so that it's last in the
+    // serialization order. Some font consumers assume that varstore runs to
+    // the end of the GDEF table.
+    // See: https://github.com/harfbuzz/harfbuzz/issues/4636
     auto snapshot_version0 = c->serializer->snapshot ();
-    if (version.to_int () >= 0x00010002u)
-    {
-      if (unlikely (!c->serializer->embed (markGlyphSetsDef))) return_trace (false);
-      subset_markglyphsetsdef = out->markGlyphSetsDef.serialize_subset (c, markGlyphSetsDef, this);
-    }
+    if (unlikely (version.to_int () >= 0x00010002u && !c->serializer->embed (markGlyphSetsDef)))
+      return_trace (false);
 
     bool subset_varstore = false;
+    unsigned varstore_index = (unsigned) -1;
     auto snapshot_version2 = c->serializer->snapshot ();
     if (version.to_int () >= 0x00010003u)
     {
@@ -690,35 +685,58 @@ struct GDEFVersion1_2
         {
           item_variations_t item_vars;
           if (item_vars.instantiate (this+varStore, c->plan, true, true,
-                                     c->plan->gdef_varstore_inner_maps.as_array ()))
+                                     c->plan->gdef_varstore_inner_maps.as_array ())) {
             subset_varstore = out->varStore.serialize_serialize (c->serializer,
                                                                  item_vars.has_long_word (),
                                                                  c->plan->axis_tags,
                                                                  item_vars.get_region_list (),
                                                                  item_vars.get_vardata_encodings ());
+            varstore_index = c->serializer->last_added_child_index();
+          }
           remap_varidx_after_instantiation (item_vars.get_varidx_map (),
                                             c->plan->layout_variation_idx_delta_map);
         }
       }
       else
+      {
         subset_varstore = out->varStore.serialize_subset (c, varStore, this, c->plan->gdef_varstore_inner_maps.as_array ());
+        varstore_index = c->serializer->last_added_child_index();
+      }
+    }
+
+    out->version.major = version.major;
+    out->version.minor = version.minor;
+
+    if (!subset_varstore && version.to_int () >= 0x00010002u) {
+      c->serializer->revert (snapshot_version2);
     }
 
+    bool subset_markglyphsetsdef = false;
+    if (version.to_int () >= 0x00010002u)
+    {
+      subset_markglyphsetsdef = out->markGlyphSetsDef.serialize_subset (c, markGlyphSetsDef, this);
+    }
 
     if (subset_varstore)
     {
       out->version.minor = 3;
       c->plan->has_gdef_varstore = true;
     } else if (subset_markglyphsetsdef) {
-      out->version.minor = 2;
-      c->serializer->revert (snapshot_version2);
+      out->version.minor = 2;      
     } else  {
       out->version.minor = 0;
       c->serializer->revert (snapshot_version0);
     }
 
+    bool subset_glyphclassdef = out->glyphClassDef.serialize_subset (c, glyphClassDef, this, nullptr, false, true);
+    bool subset_attachlist = out->attachList.serialize_subset (c, attachList, this);
+    bool subset_markattachclassdef = out->markAttachClassDef.serialize_subset (c, markAttachClassDef, this, nullptr, false, true);
     bool subset_ligcaretlist = out->ligCaretList.serialize_subset (c, ligCaretList, this);
 
+    if (subset_varstore && varstore_index != (unsigned) -1) {
+      c->serializer->repack_last(varstore_index);
+    }
+
     return_trace (subset_glyphclassdef || subset_attachlist ||
 		  subset_ligcaretlist || subset_markattachclassdef ||
 		  (out->version.to_int () >= 0x00010002u && subset_markglyphsetsdef) ||
@@ -884,14 +902,14 @@ struct GDEF
     default: return false;
     }
   }
-  const VariationStore &get_var_store () const
+  const ItemVariationStore &get_var_store () const
   {
     switch (u.version.major) {
-    case 1: return u.version.to_int () >= 0x00010003u ? this+u.version1.varStore : Null(VariationStore);
+    case 1: return u.version.to_int () >= 0x00010003u ? this+u.version1.varStore : Null(ItemVariationStore);
 #ifndef HB_NO_BEYOND_64K
     case 2: return this+u.version2.varStore;
 #endif
-    default: return Null(VariationStore);
+    default: return Null(ItemVariationStore);
     }
   }
 
@@ -1011,9 +1029,9 @@ struct GDEF
 				       hb_hashmap_t<unsigned, hb_pair_t<unsigned, int>> *layout_variation_idx_delta_map /* OUT */) const
   {
     if (!has_var_store ()) return;
-    const VariationStore &var_store = get_var_store ();
+    const ItemVariationStore &var_store = get_var_store ();
     float *store_cache = var_store.create_cache ();
-    
+
     unsigned new_major = 0, new_minor = 0;
     unsigned last_major = (layout_variation_indices->get_min ()) >> 16;
     for (unsigned idx : layout_variation_indices->iter ())
diff --git a/thirdparty/harfbuzz/src/OT/Layout/GPOS/PairPosFormat2.hh b/thirdparty/harfbuzz/src/OT/Layout/GPOS/PairPosFormat2.hh
index dd02da887d..9c805b39a1 100644
--- a/thirdparty/harfbuzz/src/OT/Layout/GPOS/PairPosFormat2.hh
+++ b/thirdparty/harfbuzz/src/OT/Layout/GPOS/PairPosFormat2.hh
@@ -324,17 +324,8 @@ struct PairPosFormat2_4 : ValueBase
       }
     }
 
-    const hb_set_t &glyphset = *c->plan->glyphset_gsub ();
-    const hb_map_t &glyph_map = *c->plan->glyph_map;
-
-    auto it =
-    + hb_iter (this+coverage)
-    | hb_filter (glyphset)
-    | hb_map_retains_sorting (glyph_map)
-    ;
-
-    out->coverage.serialize_serialize (c->serializer, it);
-    return_trace (out->class1Count && out->class2Count && bool (it));
+    bool ret = out->coverage.serialize_subset(c, coverage, this);
+    return_trace (out->class1Count && out->class2Count && ret);
   }
 
 
diff --git a/thirdparty/harfbuzz/src/OT/Layout/GPOS/ValueFormat.hh b/thirdparty/harfbuzz/src/OT/Layout/GPOS/ValueFormat.hh
index 17f57db1f5..9442cc1cc5 100644
--- a/thirdparty/harfbuzz/src/OT/Layout/GPOS/ValueFormat.hh
+++ b/thirdparty/harfbuzz/src/OT/Layout/GPOS/ValueFormat.hh
@@ -116,7 +116,7 @@ struct ValueFormat : HBUINT16
 
     if (!use_x_device && !use_y_device) return ret;
 
-    const VariationStore &store = c->var_store;
+    const ItemVariationStore &store = c->var_store;
     auto *cache = c->var_store_cache;
 
     /* pixel -> fractional pixel */
diff --git a/thirdparty/harfbuzz/src/OT/glyf/CompositeGlyph.hh b/thirdparty/harfbuzz/src/OT/glyf/CompositeGlyph.hh
index 60858a5a58..5c0ecd5133 100644
--- a/thirdparty/harfbuzz/src/OT/glyf/CompositeGlyph.hh
+++ b/thirdparty/harfbuzz/src/OT/glyf/CompositeGlyph.hh
@@ -240,7 +240,8 @@ struct CompositeGlyphRecord
     }
     if (is_anchored ()) tx = ty = 0;
 
-    trans.init ((float) tx, (float) ty);
+    /* set is_end_point flag to true, used by IUP delta optimization */
+    trans.init ((float) tx, (float) ty, true);
 
     {
       const F2DOT14 *points = (const F2DOT14 *) p;
diff --git a/thirdparty/harfbuzz/src/OT/glyf/Glyph.hh b/thirdparty/harfbuzz/src/OT/glyf/Glyph.hh
index 5ea611948f..69a0b625c7 100644
--- a/thirdparty/harfbuzz/src/OT/glyf/Glyph.hh
+++ b/thirdparty/harfbuzz/src/OT/glyf/Glyph.hh
@@ -103,6 +103,9 @@ struct Glyph
     }
   }
 
+  bool is_composite () const
+  { return type == COMPOSITE; }
+
   bool get_all_points_without_var (const hb_face_t *face,
                                    contour_point_vector_t &points /* OUT */) const
   {
diff --git a/thirdparty/harfbuzz/src/OT/glyf/glyf-helpers.hh b/thirdparty/harfbuzz/src/OT/glyf/glyf-helpers.hh
index d0a5a132f0..f157bf0020 100644
--- a/thirdparty/harfbuzz/src/OT/glyf/glyf-helpers.hh
+++ b/thirdparty/harfbuzz/src/OT/glyf/glyf-helpers.hh
@@ -38,7 +38,7 @@ _write_loca (IteratorIn&& it,
 
     unsigned padded_size = *it++;
     offset += padded_size;
-    DEBUG_MSG (SUBSET, nullptr, "loca entry gid %u offset %u padded-size %u", gid, offset, padded_size);
+    DEBUG_MSG (SUBSET, nullptr, "loca entry gid %" PRIu32 " offset %u padded-size %u", gid, offset, padded_size);
     value = offset >> right_shift;
     *dest++ = value;
 
diff --git a/thirdparty/harfbuzz/src/graph/classdef-graph.hh b/thirdparty/harfbuzz/src/graph/classdef-graph.hh
index 9cf845a82d..da6378820b 100644
--- a/thirdparty/harfbuzz/src/graph/classdef-graph.hh
+++ b/thirdparty/harfbuzz/src/graph/classdef-graph.hh
@@ -134,20 +134,23 @@ struct ClassDef : public OT::ClassDef
 
 struct class_def_size_estimator_t
 {
+  // TODO(garretrieger): update to support beyond64k coverage/classdef tables.
+  constexpr static unsigned class_def_format1_base_size = 6;
+  constexpr static unsigned class_def_format2_base_size = 4;
+  constexpr static unsigned coverage_base_size = 4;
+  constexpr static unsigned bytes_per_range = 6;
+  constexpr static unsigned bytes_per_glyph = 2;
+
   template<typename It>
   class_def_size_estimator_t (It glyph_and_class)
-      : gids_consecutive (true), num_ranges_per_class (), glyphs_per_class ()
+      : num_ranges_per_class (), glyphs_per_class ()
   {
-    unsigned last_gid = (unsigned) -1;
+    reset();
     for (auto p : + glyph_and_class)
     {
       unsigned gid = p.first;
       unsigned klass = p.second;
 
-      if (last_gid != (unsigned) -1 && gid != last_gid + 1)
-        gids_consecutive = false;
-      last_gid = gid;
-
       hb_set_t* glyphs;
       if (glyphs_per_class.has (klass, &glyphs) && glyphs) {
         glyphs->add (gid);
@@ -177,28 +180,54 @@ struct class_def_size_estimator_t
     }
   }
 
-  // Incremental increase in the Coverage and ClassDef table size
-  // (worst case) if all glyphs associated with 'klass' were added.
-  unsigned incremental_coverage_size (unsigned klass) const
+  void reset() {
+    class_def_1_size = class_def_format1_base_size;
+    class_def_2_size = class_def_format2_base_size;
+    included_glyphs.clear();
+    included_classes.clear();
+  }
+
+  // Compute the size of coverage for all glyphs added via 'add_class_def_size'.
+  unsigned coverage_size () const
   {
-    // Coverage takes 2 bytes per glyph worst case,
-    return 2 * glyphs_per_class.get (klass).get_population ();
+    unsigned format1_size = coverage_base_size + bytes_per_glyph * included_glyphs.get_population();
+    unsigned format2_size = coverage_base_size + bytes_per_range * num_glyph_ranges();
+    return hb_min(format1_size, format2_size);
   }
 
-  // Incremental increase in the Coverage and ClassDef table size
-  // (worst case) if all glyphs associated with 'klass' were added.
-  unsigned incremental_class_def_size (unsigned klass) const
+  // Compute the new size of the ClassDef table if all glyphs associated with 'klass' were added.
+  unsigned add_class_def_size (unsigned klass)
   {
-    // ClassDef takes 6 bytes per range
-    unsigned class_def_2_size = 6 * num_ranges_per_class.get (klass);
-    if (gids_consecutive)
-    {
-      // ClassDef1 takes 2 bytes per glyph, but only can be used
-      // when gids are consecutive.
-      return hb_min (2 * glyphs_per_class.get (klass).get_population (), class_def_2_size);
+    if (!included_classes.has(klass)) {
+      hb_set_t* glyphs = nullptr;
+      if (glyphs_per_class.has(klass, &glyphs)) {
+        included_glyphs.union_(*glyphs);
+      }
+
+      class_def_1_size = class_def_format1_base_size;
+      if (!included_glyphs.is_empty()) {
+        unsigned min_glyph = included_glyphs.get_min();
+        unsigned max_glyph = included_glyphs.get_max();
+        class_def_1_size += bytes_per_glyph * (max_glyph - min_glyph + 1);
+      }
+
+      class_def_2_size += bytes_per_range * num_ranges_per_class.get (klass);
+
+      included_classes.add(klass);
     }
 
-    return class_def_2_size;
+    return hb_min (class_def_1_size, class_def_2_size);
+  }
+
+  unsigned num_glyph_ranges() const {
+    hb_codepoint_t start = HB_SET_VALUE_INVALID;
+    hb_codepoint_t end = HB_SET_VALUE_INVALID;
+
+    unsigned count = 0;
+    while (included_glyphs.next_range (&start, &end)) {
+        count++;
+    }
+    return count;
   }
 
   bool in_error ()
@@ -214,9 +243,12 @@ struct class_def_size_estimator_t
   }
 
  private:
-  bool gids_consecutive;
   hb_hashmap_t<unsigned, unsigned> num_ranges_per_class;
   hb_hashmap_t<unsigned, hb_set_t> glyphs_per_class;
+  hb_set_t included_classes;
+  hb_set_t included_glyphs;
+  unsigned class_def_1_size;
+  unsigned class_def_2_size;
 };
 
 
diff --git a/thirdparty/harfbuzz/src/graph/graph.hh b/thirdparty/harfbuzz/src/graph/graph.hh
index 26ad00bdd9..2a9d8346c0 100644
--- a/thirdparty/harfbuzz/src/graph/graph.hh
+++ b/thirdparty/harfbuzz/src/graph/graph.hh
@@ -195,6 +195,15 @@ struct graph_t
       return incoming_edges_;
     }
 
+    unsigned incoming_edges_from_parent (unsigned parent_index) const {
+      if (single_parent != (unsigned) -1) {
+        return single_parent == parent_index ? 1 : 0;
+      }
+
+      unsigned* count;
+      return  parents.has(parent_index, &count) ? *count : 0;
+    }
+
     void reset_parents ()
     {
       incoming_edges_ = 0;
@@ -334,6 +343,16 @@ struct graph_t
       return true;
     }
 
+    bool give_max_priority ()
+    {
+      bool result = false;
+      while (!has_max_priority()) {
+        result = true;
+        priority++;
+      }
+      return result;
+    }
+
     bool has_max_priority () const {
       return priority >= 3;
     }
@@ -1023,6 +1042,11 @@ struct graph_t
    * Creates a copy of child and re-assigns the link from
    * parent to the clone. The copy is a shallow copy, objects
    * linked from child are not duplicated.
+   *
+   * Returns the index of the newly created duplicate.
+   *
+   * If the child_idx only has incoming edges from parent_idx, this
+   * will do nothing and return the original child_idx.
    */
   unsigned duplicate_if_shared (unsigned parent_idx, unsigned child_idx)
   {
@@ -1036,18 +1060,20 @@ struct graph_t
    * Creates a copy of child and re-assigns the link from
    * parent to the clone. The copy is a shallow copy, objects
    * linked from child are not duplicated.
+   *
+   * Returns the index of the newly created duplicate.
+   *
+   * If the child_idx only has incoming edges from parent_idx,
+   * duplication isn't possible and this will return -1.
    */
   unsigned duplicate (unsigned parent_idx, unsigned child_idx)
   {
     update_parents ();
 
-    unsigned links_to_child = 0;
-    for (const auto& l : vertices_[parent_idx].obj.all_links ())
-    {
-      if (l.objidx == child_idx) links_to_child++;
-    }
+    const auto& child = vertices_[child_idx];
+    unsigned links_to_child = child.incoming_edges_from_parent(parent_idx);
 
-    if (vertices_[child_idx].incoming_edges () <= links_to_child)
+    if (child.incoming_edges () <= links_to_child)
     {
       // Can't duplicate this node, doing so would orphan the original one as all remaining links
       // to child are from parent.
@@ -1060,7 +1086,7 @@ struct graph_t
                parent_idx, child_idx);
 
     unsigned clone_idx = duplicate (child_idx);
-    if (clone_idx == (unsigned) -1) return false;
+    if (clone_idx == (unsigned) -1) return -1;
     // duplicate shifts the root node idx, so if parent_idx was root update it.
     if (parent_idx == clone_idx) parent_idx++;
 
@@ -1076,6 +1102,62 @@ struct graph_t
     return clone_idx;
   }
 
+  /*
+   * Creates a copy of child and re-assigns the links from
+   * parents to the clone. The copy is a shallow copy, objects
+   * linked from child are not duplicated.
+   *
+   * Returns the index of the newly created duplicate.
+   *
+   * If the child_idx only has incoming edges from parents,
+   * duplication isn't possible or duplication fails and this will
+   * return -1.
+   */
+  unsigned duplicate (const hb_set_t* parents, unsigned child_idx)
+  {
+    if (parents->is_empty()) {
+      return -1;
+    }
+
+    update_parents ();
+
+    const auto& child = vertices_[child_idx];
+    unsigned links_to_child = 0;
+    unsigned last_parent = parents->get_max();
+    unsigned first_parent = parents->get_min();
+    for (unsigned parent_idx : *parents) {
+      links_to_child += child.incoming_edges_from_parent(parent_idx);
+    }
+
+    if (child.incoming_edges () <= links_to_child)
+    {
+      // Can't duplicate this node, doing so would orphan the original one as all remaining links
+      // to child are from parent.
+      DEBUG_MSG (SUBSET_REPACK, nullptr, "  Not duplicating %u, ..., %u => %u", first_parent, last_parent, child_idx);
+      return -1;
+    }
+
+    DEBUG_MSG (SUBSET_REPACK, nullptr, "  Duplicating %u, ..., %u => %u", first_parent, last_parent, child_idx);
+
+    unsigned clone_idx = duplicate (child_idx);
+    if (clone_idx == (unsigned) -1) return false;
+    
+    for (unsigned parent_idx : *parents) {
+      // duplicate shifts the root node idx, so if parent_idx was root update it.
+      if (parent_idx == clone_idx) parent_idx++;
+      auto& parent = vertices_[parent_idx];
+      for (auto& l : parent.obj.all_links_writer ())
+      {
+        if (l.objidx != child_idx)
+          continue;
+
+        reassign_link (l, parent_idx, clone_idx);
+      }
+    }
+
+    return clone_idx;
+  }
+
 
   /*
    * Adds a new node to the graph, not connected to anything.
diff --git a/thirdparty/harfbuzz/src/graph/pairpos-graph.hh b/thirdparty/harfbuzz/src/graph/pairpos-graph.hh
index f7f74b18c9..fd46861de4 100644
--- a/thirdparty/harfbuzz/src/graph/pairpos-graph.hh
+++ b/thirdparty/harfbuzz/src/graph/pairpos-graph.hh
@@ -247,8 +247,8 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
     for (unsigned i = 0; i < class1_count; i++)
     {
       unsigned accumulated_delta = class1_record_size;
-      coverage_size += estimator.incremental_coverage_size (i);
-      class_def_1_size += estimator.incremental_class_def_size (i);
+      class_def_1_size = estimator.add_class_def_size (i);
+      coverage_size = estimator.coverage_size ();
       max_coverage_size = hb_max (max_coverage_size, coverage_size);
       max_class_def_1_size = hb_max (max_class_def_1_size, class_def_1_size);
 
@@ -280,8 +280,10 @@ struct PairPosFormat2 : public OT::Layout::GPOS_impl::PairPosFormat2_4<SmallType
         split_points.push (i);
         // split does not include i, so add the size for i when we reset the size counters.
         accumulated = base_size + accumulated_delta;
-        coverage_size = 4 + estimator.incremental_coverage_size (i);
-        class_def_1_size = 4 + estimator.incremental_class_def_size (i);
+
+        estimator.reset();
+        class_def_1_size = estimator.add_class_def_size(i);
+        coverage_size = estimator.coverage_size();
         visited.clear (); // node sharing isn't allowed between splits.
       }
     }
diff --git a/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh b/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh
index 06c9334b37..8436551324 100644
--- a/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh
+++ b/thirdparty/harfbuzz/src/hb-aat-layout-morx-table.hh
@@ -552,6 +552,7 @@ struct LigatureSubtable
 	    {
 	      DEBUG_MSG (APPLY, nullptr, "Skipping ligature component");
 	      if (unlikely (!buffer->move_to (match_positions[--match_length % ARRAY_LENGTH (match_positions)]))) return;
+	      buffer->cur().unicode_props() |= UPROPS_MASK_IGNORABLE;
 	      if (unlikely (!buffer->replace_glyph (DELETED_GLYPH))) return;
 	    }
 
diff --git a/thirdparty/harfbuzz/src/hb-algs.hh b/thirdparty/harfbuzz/src/hb-algs.hh
index ea97057165..efa6074a42 100644
--- a/thirdparty/harfbuzz/src/hb-algs.hh
+++ b/thirdparty/harfbuzz/src/hb-algs.hh
@@ -671,7 +671,7 @@ struct hb_pair_t
     return 0;
   }
 
-  friend void swap (hb_pair_t& a, hb_pair_t& b)
+  friend void swap (hb_pair_t& a, hb_pair_t& b) noexcept
   {
     hb_swap (a.first, b.first);
     hb_swap (a.second, b.second);
@@ -1053,6 +1053,18 @@ _hb_cmp_method (const void *pkey, const void *pval, Ts... ds)
   return val.cmp (key, ds...);
 }
 
+template <typename K, typename V>
+static int
+_hb_cmp_operator (const void *pkey, const void *pval)
+{
+  const K& key = * (const K*) pkey;
+  const V& val = * (const V*) pval;
+
+  if (key < val) return -1;
+  if (key > val) return  1;
+  return 0;
+}
+
 template <typename V, typename K, typename ...Ts>
 static inline bool
 hb_bsearch_impl (unsigned *pos, /* Out */
diff --git a/thirdparty/harfbuzz/src/hb-bit-set-invertible.hh b/thirdparty/harfbuzz/src/hb-bit-set-invertible.hh
index 2626251807..d5d1326d9f 100644
--- a/thirdparty/harfbuzz/src/hb-bit-set-invertible.hh
+++ b/thirdparty/harfbuzz/src/hb-bit-set-invertible.hh
@@ -39,10 +39,10 @@ struct hb_bit_set_invertible_t
 
   hb_bit_set_invertible_t () = default;
   hb_bit_set_invertible_t (const hb_bit_set_invertible_t& o) = default;
-  hb_bit_set_invertible_t (hb_bit_set_invertible_t&& other) : hb_bit_set_invertible_t () { hb_swap (*this, other); }
+  hb_bit_set_invertible_t (hb_bit_set_invertible_t&& other)  noexcept : hb_bit_set_invertible_t () { hb_swap (*this, other); }
   hb_bit_set_invertible_t& operator= (const hb_bit_set_invertible_t& o) = default;
-  hb_bit_set_invertible_t& operator= (hb_bit_set_invertible_t&& other) { hb_swap (*this, other); return *this; }
-  friend void swap (hb_bit_set_invertible_t &a, hb_bit_set_invertible_t &b)
+  hb_bit_set_invertible_t& operator= (hb_bit_set_invertible_t&& other)  noexcept { hb_swap (*this, other); return *this; }
+  friend void swap (hb_bit_set_invertible_t &a, hb_bit_set_invertible_t &b) noexcept
   {
     if (likely (!a.s.successful || !b.s.successful))
       return;
diff --git a/thirdparty/harfbuzz/src/hb-bit-set.hh b/thirdparty/harfbuzz/src/hb-bit-set.hh
index 1dbcce5cbd..5f4c6f0afe 100644
--- a/thirdparty/harfbuzz/src/hb-bit-set.hh
+++ b/thirdparty/harfbuzz/src/hb-bit-set.hh
@@ -38,10 +38,10 @@ struct hb_bit_set_t
   ~hb_bit_set_t () = default;
 
   hb_bit_set_t (const hb_bit_set_t& other) : hb_bit_set_t () { set (other, true); }
-  hb_bit_set_t ( hb_bit_set_t&& other) : hb_bit_set_t () { hb_swap (*this, other); }
+  hb_bit_set_t ( hb_bit_set_t&& other)  noexcept : hb_bit_set_t () { hb_swap (*this, other); }
   hb_bit_set_t& operator= (const hb_bit_set_t& other) { set (other); return *this; }
-  hb_bit_set_t& operator= (hb_bit_set_t&& other) { hb_swap (*this, other); return *this; }
-  friend void swap (hb_bit_set_t &a, hb_bit_set_t &b)
+  hb_bit_set_t& operator= (hb_bit_set_t&& other)  noexcept { hb_swap (*this, other); return *this; }
+  friend void swap (hb_bit_set_t &a, hb_bit_set_t &b) noexcept
   {
     if (likely (!a.successful || !b.successful))
       return;
diff --git a/thirdparty/harfbuzz/src/hb-blob.cc b/thirdparty/harfbuzz/src/hb-blob.cc
index 265effba03..873d9b257a 100644
--- a/thirdparty/harfbuzz/src/hb-blob.cc
+++ b/thirdparty/harfbuzz/src/hb-blob.cc
@@ -598,6 +598,11 @@ _open_resource_fork (const char *file_name, hb_mapped_file_t *file)
  * Creates a new blob containing the data from the
  * specified binary font file.
  *
+ * The filename is passed directly to the system on all platforms,
+ * except on Windows, where the filename is interpreted as UTF-8.
+ * Only if the filename is not valid UTF-8, it will be interpreted
+ * according to the system codepage.
+ *
  * Returns: An #hb_blob_t pointer with the content of the file,
  * or hb_blob_get_empty() if failed.
  *
@@ -617,6 +622,11 @@ hb_blob_create_from_file (const char *file_name)
  * Creates a new blob containing the data from the
  * specified binary font file.
  *
+ * The filename is passed directly to the system on all platforms,
+ * except on Windows, where the filename is interpreted as UTF-8.
+ * Only if the filename is not valid UTF-8, it will be interpreted
+ * according to the system codepage.
+ *
  * Returns: An #hb_blob_t pointer with the content of the file,
  * or `NULL` if failed.
  *
@@ -672,10 +682,19 @@ fail_without_close:
   if (unlikely (!file)) return nullptr;
 
   HANDLE fd;
+  int conversion;
   unsigned int size = strlen (file_name) + 1;
   wchar_t * wchar_file_name = (wchar_t *) hb_malloc (sizeof (wchar_t) * size);
   if (unlikely (!wchar_file_name)) goto fail_without_close;
-  mbstowcs (wchar_file_name, file_name, size);
+
+  /* Assume file name is given in UTF-8 encoding */
+  conversion = MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, file_name, -1, wchar_file_name, size);
+  if (conversion <= 0)
+  {
+    /* Conversion failed due to invalid UTF-8 characters,
+       Repeat conversion based on system code page */
+    mbstowcs(wchar_file_name, file_name, size);
+  }
 #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
   {
     CREATEFILE2_EXTENDED_PARAMETERS ceparams = { 0 };
diff --git a/thirdparty/harfbuzz/src/hb-buffer-verify.cc b/thirdparty/harfbuzz/src/hb-buffer-verify.cc
index 15a53919de..671d6eda8c 100644
--- a/thirdparty/harfbuzz/src/hb-buffer-verify.cc
+++ b/thirdparty/harfbuzz/src/hb-buffer-verify.cc
@@ -149,7 +149,7 @@ buffer_verify_unsafe_to_break (hb_buffer_t  *buffer,
     }
     assert (text_start < text_end);
 
-    if (0)
+    if (false)
       printf("start %u end %u text start %u end %u\n", start, end, text_start, text_end);
 
     hb_buffer_clear_contents (fragment);
@@ -288,7 +288,7 @@ buffer_verify_unsafe_to_concat (hb_buffer_t        *buffer,
       }
       assert (text_start < text_end);
 
-      if (0)
+      if (false)
 	printf("start %u end %u text start %u end %u\n", start, end, text_start, text_end);
 
 #if 0
diff --git a/thirdparty/harfbuzz/src/hb-buffer.cc b/thirdparty/harfbuzz/src/hb-buffer.cc
index 934c6c2129..d621a7cc55 100644
--- a/thirdparty/harfbuzz/src/hb-buffer.cc
+++ b/thirdparty/harfbuzz/src/hb-buffer.cc
@@ -309,6 +309,7 @@ hb_buffer_t::clear ()
 
   deallocate_var_all ();
   serial = 0;
+  random_state = 1;
   scratch_flags = HB_BUFFER_SCRATCH_FLAG_DEFAULT;
 }
 
@@ -1359,6 +1360,49 @@ hb_buffer_get_not_found_glyph (const hb_buffer_t *buffer)
   return buffer->not_found;
 }
 
+/**
+ * hb_buffer_set_random_state:
+ * @buffer: An #hb_buffer_t
+ * @state: the new random state
+ *
+ * Sets the random state of the buffer. The state changes
+ * every time a glyph uses randomness (eg. the `rand`
+ * OpenType feature). This function together with
+ * hb_buffer_get_random_state() allow for transferring
+ * the current random state to a subsequent buffer, to
+ * get better randomness distribution.
+ *
+ * Defaults to 1 and when buffer contents are cleared.
+ * A value of 0 disables randomness during shaping.
+ *
+ * Since: 8.4.0
+ **/
+void
+hb_buffer_set_random_state (hb_buffer_t    *buffer,
+			    unsigned        state)
+{
+  if (unlikely (hb_object_is_immutable (buffer)))
+    return;
+
+  buffer->random_state = state;
+}
+
+/**
+ * hb_buffer_get_random_state:
+ * @buffer: An #hb_buffer_t
+ *
+ * See hb_buffer_set_random_state().
+ *
+ * Return value:
+ * The @buffer random state
+ *
+ * Since: 8.4.0
+ **/
+unsigned
+hb_buffer_get_random_state (const hb_buffer_t *buffer)
+{
+  return buffer->random_state;
+}
 
 /**
  * hb_buffer_clear_contents:
diff --git a/thirdparty/harfbuzz/src/hb-buffer.h b/thirdparty/harfbuzz/src/hb-buffer.h
index 3573127ff0..f75fe96b21 100644
--- a/thirdparty/harfbuzz/src/hb-buffer.h
+++ b/thirdparty/harfbuzz/src/hb-buffer.h
@@ -487,6 +487,12 @@ hb_buffer_set_not_found_glyph (hb_buffer_t    *buffer,
 HB_EXTERN hb_codepoint_t
 hb_buffer_get_not_found_glyph (const hb_buffer_t *buffer);
 
+HB_EXTERN void
+hb_buffer_set_random_state (hb_buffer_t    *buffer,
+			    unsigned        state);
+
+HB_EXTERN unsigned
+hb_buffer_get_random_state (const hb_buffer_t *buffer);
 
 /*
  * Content API.
diff --git a/thirdparty/harfbuzz/src/hb-buffer.hh b/thirdparty/harfbuzz/src/hb-buffer.hh
index f04ad58f11..0a198722d6 100644
--- a/thirdparty/harfbuzz/src/hb-buffer.hh
+++ b/thirdparty/harfbuzz/src/hb-buffer.hh
@@ -116,6 +116,7 @@ struct hb_buffer_t
 
   uint8_t allocated_var_bits;
   uint8_t serial;
+  uint32_t random_state;
   hb_buffer_scratch_flags_t scratch_flags; /* Have space-fallback, etc. */
   unsigned int max_len; /* Maximum allowed len. */
   int max_ops; /* Maximum allowed operations. */
diff --git a/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh b/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh
index 53226b227e..a08b10b5ff 100644
--- a/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh
+++ b/thirdparty/harfbuzz/src/hb-cff-interp-dict-common.hh
@@ -54,8 +54,8 @@ struct top_dict_values_t : dict_values_t<OPSTR>
   }
   void fini () { dict_values_t<OPSTR>::fini (); }
 
-  unsigned int  charStringsOffset;
-  unsigned int  FDArrayOffset;
+  int  charStringsOffset;
+  int  FDArrayOffset;
 };
 
 struct dict_opset_t : opset_t<number_t>
@@ -157,11 +157,11 @@ struct top_dict_opset_t : dict_opset_t
   {
     switch (op) {
       case OpCode_CharStrings:
-	dictval.charStringsOffset = env.argStack.pop_uint ();
+	dictval.charStringsOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
       case OpCode_FDArray:
-	dictval.FDArrayOffset = env.argStack.pop_uint ();
+	dictval.FDArrayOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
       case OpCode_FontMatrix:
diff --git a/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh b/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh
index 915b10cf39..55b1d3bf8d 100644
--- a/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh
+++ b/thirdparty/harfbuzz/src/hb-cff2-interp-cs.hh
@@ -168,7 +168,7 @@ struct cff2_cs_interp_env_t : cs_interp_env_t<ELEM, CFF2Subrs>
   protected:
   const int     *coords;
   unsigned int  num_coords;
-  const	 CFF2VariationStore *varStore;
+  const	 CFF2ItemVariationStore *varStore;
   unsigned int  region_count;
   unsigned int  ivs;
   hb_vector_t<float>  scalars;
diff --git a/thirdparty/harfbuzz/src/hb-common.cc b/thirdparty/harfbuzz/src/hb-common.cc
index 0c13c7d171..4b8bae4422 100644
--- a/thirdparty/harfbuzz/src/hb-common.cc
+++ b/thirdparty/harfbuzz/src/hb-common.cc
@@ -996,7 +996,7 @@ hb_feature_to_string (hb_feature_t *feature,
   if (feature->value > 1)
   {
     s[len++] = '=';
-    len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%u", feature->value));
+    len += hb_max (0, snprintf (s + len, ARRAY_LENGTH (s) - len, "%" PRIu32, feature->value));
   }
   assert (len < ARRAY_LENGTH (s));
   len = hb_min (len, size - 1);
diff --git a/thirdparty/harfbuzz/src/hb-common.h b/thirdparty/harfbuzz/src/hb-common.h
index a9fe666b39..533de91562 100644
--- a/thirdparty/harfbuzz/src/hb-common.h
+++ b/thirdparty/harfbuzz/src/hb-common.h
@@ -47,14 +47,10 @@
 # endif /* !__cplusplus */
 #endif
 
-#if defined (_SVR4) || defined (SVR4) || defined (__OpenBSD__) || \
-    defined (_sgi) || defined (__sun) || defined (sun) || \
-    defined (__digital__) || defined (__HP_cc)
-#  include <inttypes.h>
-#elif defined (_AIX)
+#if defined (_AIX)
 #  include <sys/inttypes.h>
 #elif defined (_MSC_VER) && _MSC_VER < 1600
-/* VS 2010 (_MSC_VER 1600) has stdint.h */
+/* VS 2010 (_MSC_VER 1600) has stdint.h   */
 typedef __int8 int8_t;
 typedef unsigned __int8 uint8_t;
 typedef __int16 int16_t;
@@ -63,10 +59,11 @@ typedef __int32 int32_t;
 typedef unsigned __int32 uint32_t;
 typedef __int64 int64_t;
 typedef unsigned __int64 uint64_t;
-#elif defined (__KERNEL__)
-#  include <linux/types.h>
-#else
+#elif defined (_MSC_VER) && _MSC_VER < 1800
+/* VS 2013 (_MSC_VER 1800) has inttypes.h */
 #  include <stdint.h>
+#else
+#  include <inttypes.h>
 #endif
 
 #if defined(__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))
diff --git a/thirdparty/harfbuzz/src/hb-cplusplus.hh b/thirdparty/harfbuzz/src/hb-cplusplus.hh
index 531ef1b7c8..a640e192de 100644
--- a/thirdparty/harfbuzz/src/hb-cplusplus.hh
+++ b/thirdparty/harfbuzz/src/hb-cplusplus.hh
@@ -56,15 +56,15 @@ struct shared_ptr
 
   explicit shared_ptr (T *p = nullptr) : p (p) {}
   shared_ptr (const shared_ptr &o) : p (v::reference (o.p)) {}
-  shared_ptr (shared_ptr &&o) : p (o.p) { o.p = nullptr; }
+  shared_ptr (shared_ptr &&o)  noexcept : p (o.p) { o.p = nullptr; }
   shared_ptr& operator = (const shared_ptr &o) { if (p != o.p) { destroy (); p = o.p; reference (); } return *this; }
-  shared_ptr& operator = (shared_ptr &&o) { v::destroy (p); p = o.p; o.p = nullptr; return *this; }
+  shared_ptr& operator = (shared_ptr &&o)  noexcept { v::destroy (p); p = o.p; o.p = nullptr; return *this; }
   ~shared_ptr () { v::destroy (p); p = nullptr; }
 
   T* get() const { return p; }
 
-  void swap (shared_ptr &o) { std::swap (p, o.p); }
-  friend void swap (shared_ptr &a, shared_ptr &b) { std::swap (a.p, b.p); }
+  void swap (shared_ptr &o)  noexcept { std::swap (p, o.p); }
+  friend void swap (shared_ptr &a, shared_ptr &b)  noexcept { std::swap (a.p, b.p); }
 
   operator T * () const { return p; }
   T& operator * () const { return *get (); }
@@ -98,16 +98,16 @@ struct unique_ptr
 
   explicit unique_ptr (T *p = nullptr) : p (p) {}
   unique_ptr (const unique_ptr &o) = delete;
-  unique_ptr (unique_ptr &&o) : p (o.p) { o.p = nullptr; }
+  unique_ptr (unique_ptr &&o)  noexcept : p (o.p) { o.p = nullptr; }
   unique_ptr& operator = (const unique_ptr &o) = delete;
-  unique_ptr& operator = (unique_ptr &&o) { v::destroy (p); p = o.p; o.p = nullptr; return *this; }
+  unique_ptr& operator = (unique_ptr &&o)  noexcept { v::destroy (p); p = o.p; o.p = nullptr; return *this; }
   ~unique_ptr () { v::destroy (p); p = nullptr; }
 
   T* get() const { return p; }
   T* release () { T* v = p; p = nullptr; return v; }
 
-  void swap (unique_ptr &o) { std::swap (p, o.p); }
-  friend void swap (unique_ptr &a, unique_ptr &b) { std::swap (a.p, b.p); }
+  void swap (unique_ptr &o)  noexcept { std::swap (p, o.p); }
+  friend void swap (unique_ptr &a, unique_ptr &b)  noexcept { std::swap (a.p, b.p); }
 
   operator T * () const { return p; }
   T& operator * () const { return *get (); }
diff --git a/thirdparty/harfbuzz/src/hb-directwrite.cc b/thirdparty/harfbuzz/src/hb-directwrite.cc
index 42764a244b..6c90265d0b 100644
--- a/thirdparty/harfbuzz/src/hb-directwrite.cc
+++ b/thirdparty/harfbuzz/src/hb-directwrite.cc
@@ -173,7 +173,7 @@ _hb_directwrite_shaper_face_data_create (hb_face_t *face)
 
   t_DWriteCreateFactory p_DWriteCreateFactory;
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wcast-function-type"
 #endif
@@ -181,7 +181,7 @@ _hb_directwrite_shaper_face_data_create (hb_face_t *face)
   p_DWriteCreateFactory = (t_DWriteCreateFactory)
 			  GetProcAddress (data->dwrite_dll, "DWriteCreateFactory");
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__clang__)
 #pragma GCC diagnostic pop
 #endif
 
diff --git a/thirdparty/harfbuzz/src/hb-features.h b/thirdparty/harfbuzz/src/hb-features.h
new file mode 100644
index 0000000000..9199864195
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-features.h
@@ -0,0 +1,119 @@
+/*
+ * Copyright © 2022 Red Hat, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_FEATURES_H
+#define HB_FEATURES_H
+
+HB_BEGIN_DECLS
+
+/**
+ * SECTION: hb-features
+ * @title: hb-features
+ * @short_description: Feature detection
+ * @include: hb-features.h
+ *
+ * Macros for detecting optional HarfBuzz features at build time.
+ **/
+
+/**
+ * HB_HAS_CAIRO:
+ *
+ * Defined if Harfbuzz has been built with cairo support.
+ */
+#
+
+/**
+ * HB_HAS_CORETEXT:
+ *
+ * Defined if Harfbuzz has been built with CoreText support.
+ */
+#undef HB_HAS_CORETEXT
+
+/**
+ * HB_HAS_DIRECTWRITE:
+ *
+ * Defined if Harfbuzz has been built with DirectWrite support.
+ */
+#undef HB_HAS_DIRECTWRITE
+
+/**
+ * HB_HAS_FREETYPE:
+ *
+ * Defined if Harfbuzz has been built with Freetype support.
+ */
+#define HB_HAS_FREETYPE 1
+
+/**
+ * HB_HAS_GDI:
+ *
+ * Defined if Harfbuzz has been built with GDI support.
+ */
+#undef HB_HAS_GDI
+
+/**
+ * HB_HAS_GLIB:
+ *
+ * Defined if Harfbuzz has been built with GLib support.
+ */
+#define HB_HAS_GLIB 1
+
+/**
+ * HB_HAS_GOBJECT:
+ *
+ * Defined if Harfbuzz has been built with GObject support.
+ */
+#undef HB_HAS_GOBJECT
+
+/**
+ * HB_HAS_GRAPHITE:
+ *
+ * Defined if Harfbuzz has been built with Graphite support.
+ */
+#undef HB_HAS_GRAPHITE
+
+/**
+ * HB_HAS_ICU:
+ *
+ * Defined if Harfbuzz has been built with ICU support.
+ */
+#undef HB_HAS_ICU
+
+/**
+ * HB_HAS_UNISCRIBE:
+ *
+ * Defined if Harfbuzz has been built with Uniscribe support.
+ */
+#undef HB_HAS_UNISCRIBE
+
+/**
+ * HB_HAS_WASM:
+ *
+ * Defined if Harfbuzz has been built with WebAssembly support.
+ */
+#undef HB_HAS_WASM
+
+
+HB_END_DECLS
+
+#endif /* HB_FEATURES_H */
diff --git a/thirdparty/harfbuzz/src/hb-font.hh b/thirdparty/harfbuzz/src/hb-font.hh
index f503575c34..4c8190b0dd 100644
--- a/thirdparty/harfbuzz/src/hb-font.hh
+++ b/thirdparty/harfbuzz/src/hb-font.hh
@@ -651,7 +651,7 @@ struct hb_font_t
   {
     if (get_glyph_name (glyph, s, size)) return;
 
-    if (size && snprintf (s, size, "gid%u", glyph) < 0)
+    if (size && snprintf (s, size, "gid%" PRIu32, glyph) < 0)
       *s = '\0';
   }
 
diff --git a/thirdparty/harfbuzz/src/hb-ft.cc b/thirdparty/harfbuzz/src/hb-ft.cc
index 955a9081e0..3de4a6d5d4 100644
--- a/thirdparty/harfbuzz/src/hb-ft.cc
+++ b/thirdparty/harfbuzz/src/hb-ft.cc
@@ -224,7 +224,7 @@ _hb_ft_hb_font_check_changed (hb_font_t *font,
  *
  * Sets the FT_Load_Glyph load flags for the specified #hb_font_t.
  *
- * For more information, see 
+ * For more information, see
  * <https://freetype.org/freetype2/docs/reference/ft2-glyph_retrieval.html#ft_load_xxx>
  *
  * This function works with #hb_font_t objects created by
@@ -252,7 +252,7 @@ hb_ft_font_set_load_flags (hb_font_t *font, int load_flags)
  *
  * Fetches the FT_Load_Glyph load flags of the specified #hb_font_t.
  *
- * For more information, see 
+ * For more information, see
  * <https://freetype.org/freetype2/docs/reference/ft2-glyph_retrieval.html#ft_load_xxx>
  *
  * This function works with #hb_font_t objects created by
@@ -1118,10 +1118,10 @@ _hb_ft_reference_table (hb_face_t *face HB_UNUSED, hb_tag_t tag, void *user_data
  * This variant of the function does not provide any life-cycle management.
  *
  * Most client programs should use hb_ft_face_create_referenced()
- * (or, perhaps, hb_ft_face_create_cached()) instead. 
+ * (or, perhaps, hb_ft_face_create_cached()) instead.
  *
  * If you know you have valid reasons not to use hb_ft_face_create_referenced(),
- * then it is the client program's responsibility to destroy @ft_face 
+ * then it is the client program's responsibility to destroy @ft_face
  * after the #hb_face_t face object has been destroyed.
  *
  * Return value: (transfer full): the new #hb_face_t face object
@@ -1215,7 +1215,7 @@ hb_ft_face_finalize (void *arg)
 hb_face_t *
 hb_ft_face_create_cached (FT_Face ft_face)
 {
-  if (unlikely (!ft_face->generic.data || ft_face->generic.finalizer != (FT_Generic_Finalizer) hb_ft_face_finalize))
+  if (unlikely (!ft_face->generic.data || ft_face->generic.finalizer != hb_ft_face_finalize))
   {
     if (ft_face->generic.finalizer)
       ft_face->generic.finalizer (ft_face);
@@ -1241,13 +1241,13 @@ hb_ft_face_create_cached (FT_Face ft_face)
  * This variant of the function does not provide any life-cycle management.
  *
  * Most client programs should use hb_ft_font_create_referenced()
- * instead. 
+ * instead.
  *
  * If you know you have valid reasons not to use hb_ft_font_create_referenced(),
- * then it is the client program's responsibility to destroy @ft_face 
+ * then it is the client program's responsibility to destroy @ft_face
  * after the #hb_font_t font object has been destroyed.
  *
- * HarfBuzz will use the @destroy callback on the #hb_font_t font object 
+ * HarfBuzz will use the @destroy callback on the #hb_font_t font object
  * if it is supplied when you use this function. However, even if @destroy
  * is provided, it is the client program's responsibility to destroy @ft_face,
  * and it is the client program's responsibility to ensure that @ft_face is
diff --git a/thirdparty/harfbuzz/src/hb-icu.cc b/thirdparty/harfbuzz/src/hb-icu.cc
index e46401f7a6..3707ec30f8 100644
--- a/thirdparty/harfbuzz/src/hb-icu.cc
+++ b/thirdparty/harfbuzz/src/hb-icu.cc
@@ -93,15 +93,16 @@ hb_icu_script_to_script (UScriptCode script)
 UScriptCode
 hb_icu_script_from_script (hb_script_t script)
 {
+  UScriptCode out = USCRIPT_INVALID_CODE;
+
   if (unlikely (script == HB_SCRIPT_INVALID))
-    return USCRIPT_INVALID_CODE;
+    return out;
 
-  unsigned int numScriptCode = 1 + u_getIntPropertyMaxValue (UCHAR_SCRIPT);
-  for (unsigned int i = 0; i < numScriptCode; i++)
-    if (unlikely (hb_icu_script_to_script ((UScriptCode) i) == script))
-      return (UScriptCode) i;
+  UErrorCode icu_err = U_ZERO_ERROR;
+  const unsigned char buf[5] = {HB_UNTAG (script), 0};
+  uscript_getCode ((const char *) buf, &out, 1, &icu_err);
 
-  return USCRIPT_UNKNOWN;
+  return out;
 }
 
 
diff --git a/thirdparty/harfbuzz/src/hb-limits.hh b/thirdparty/harfbuzz/src/hb-limits.hh
index 25c1e71e13..7efc893eae 100644
--- a/thirdparty/harfbuzz/src/hb-limits.hh
+++ b/thirdparty/harfbuzz/src/hb-limits.hh
@@ -106,7 +106,7 @@
 #endif
 
 #ifndef HB_COLRV1_MAX_EDGE_COUNT
-#define HB_COLRV1_MAX_EDGE_COUNT 65536
+#define HB_COLRV1_MAX_EDGE_COUNT 2048
 #endif
 
 
diff --git a/thirdparty/harfbuzz/src/hb-map.hh b/thirdparty/harfbuzz/src/hb-map.hh
index 45a02b830c..6521b1a41d 100644
--- a/thirdparty/harfbuzz/src/hb-map.hh
+++ b/thirdparty/harfbuzz/src/hb-map.hh
@@ -70,9 +70,9 @@ struct hb_hashmap_t
 
     alloc (o.population); hb_copy (o, *this);
   }
-  hb_hashmap_t (hb_hashmap_t&& o) : hb_hashmap_t () { hb_swap (*this, o); }
+  hb_hashmap_t (hb_hashmap_t&& o)  noexcept : hb_hashmap_t () { hb_swap (*this, o); }
   hb_hashmap_t& operator= (const hb_hashmap_t& o)  { reset (); alloc (o.population); hb_copy (o, *this); return *this; }
-  hb_hashmap_t& operator= (hb_hashmap_t&& o)  { hb_swap (*this, o); return *this; }
+  hb_hashmap_t& operator= (hb_hashmap_t&& o)   noexcept { hb_swap (*this, o); return *this; }
 
   hb_hashmap_t (std::initializer_list<hb_pair_t<K, V>> lst) : hb_hashmap_t ()
   {
@@ -137,26 +137,23 @@ struct hb_hashmap_t
   };
 
   hb_object_header_t header;
-  unsigned int successful : 1; /* Allocations successful */
-  unsigned int population : 31; /* Not including tombstones. */
+  bool successful; /* Allocations successful */
+  unsigned short max_chain_length;
+  unsigned int population; /* Not including tombstones. */
   unsigned int occupancy; /* Including tombstones. */
   unsigned int mask;
   unsigned int prime;
-  unsigned int max_chain_length;
   item_t *items;
 
-  friend void swap (hb_hashmap_t& a, hb_hashmap_t& b)
+  friend void swap (hb_hashmap_t& a, hb_hashmap_t& b) noexcept
   {
     if (unlikely (!a.successful || !b.successful))
       return;
-    unsigned tmp = a.population;
-    a.population = b.population;
-    b.population = tmp;
-    //hb_swap (a.population, b.population);
+    hb_swap (a.max_chain_length, b.max_chain_length);
+    hb_swap (a.population, b.population);
     hb_swap (a.occupancy, b.occupancy);
     hb_swap (a.mask, b.mask);
     hb_swap (a.prime, b.prime);
-    hb_swap (a.max_chain_length, b.max_chain_length);
     hb_swap (a.items, b.items);
   }
   void init ()
@@ -164,10 +161,10 @@ struct hb_hashmap_t
     hb_object_init (this);
 
     successful = true;
+    max_chain_length = 0;
     population = occupancy = 0;
     mask = 0;
     prime = 0;
-    max_chain_length = 0;
     items = nullptr;
   }
   void fini ()
@@ -558,7 +555,7 @@ struct hb_map_t : hb_hashmap_t<hb_codepoint_t,
   ~hb_map_t () = default;
   hb_map_t () : hashmap () {}
   hb_map_t (const hb_map_t &o) : hashmap ((hashmap &) o) {}
-  hb_map_t (hb_map_t &&o) : hashmap (std::move ((hashmap &) o)) {}
+  hb_map_t (hb_map_t &&o)  noexcept : hashmap (std::move ((hashmap &) o)) {}
   hb_map_t& operator= (const hb_map_t&) = default;
   hb_map_t& operator= (hb_map_t&&) = default;
   hb_map_t (std::initializer_list<hb_codepoint_pair_t> lst) : hashmap (lst) {}
diff --git a/thirdparty/harfbuzz/src/hb-object.hh b/thirdparty/harfbuzz/src/hb-object.hh
index e2c2c3394c..5cffe1666b 100644
--- a/thirdparty/harfbuzz/src/hb-object.hh
+++ b/thirdparty/harfbuzz/src/hb-object.hh
@@ -325,7 +325,7 @@ retry:
   hb_user_data_array_t *user_data = obj->header.user_data.get_acquire ();
   if (unlikely (!user_data))
   {
-    user_data = (hb_user_data_array_t *) hb_calloc (sizeof (hb_user_data_array_t), 1);
+    user_data = (hb_user_data_array_t *) hb_calloc (1, sizeof (hb_user_data_array_t));
     if (unlikely (!user_data))
       return false;
     user_data->init ();
diff --git a/thirdparty/harfbuzz/src/hb-open-type.hh b/thirdparty/harfbuzz/src/hb-open-type.hh
index 6967bca3d4..9c11f14344 100644
--- a/thirdparty/harfbuzz/src/hb-open-type.hh
+++ b/thirdparty/harfbuzz/src/hb-open-type.hh
@@ -985,6 +985,13 @@ struct SortedArrayOf : ArrayOf<Type, LenType>
     return_trace (ret);
   }
 
+  SortedArrayOf* copy (hb_serialize_context_t *c) const
+  {
+    TRACE_SERIALIZE (this);
+    SortedArrayOf* out = reinterpret_cast<SortedArrayOf *> (ArrayOf<Type, LenType>::copy (c));
+    return_trace (out);
+  }
+
   template <typename T>
   Type &bsearch (const T &x, Type &not_found = Crap (Type))
   { return *as_array ().bsearch (x, &not_found); }
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff-common.hh b/thirdparty/harfbuzz/src/hb-ot-cff-common.hh
index 4fdba197ac..c7c3264c08 100644
--- a/thirdparty/harfbuzz/src/hb-ot-cff-common.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-cff-common.hh
@@ -41,10 +41,21 @@ using namespace OT;
 using objidx_t = hb_serialize_context_t::objidx_t;
 using whence_t = hb_serialize_context_t::whence_t;
 
-/* utility macro */
-template<typename Type>
-static inline const Type& StructAtOffsetOrNull (const void *P, unsigned int offset)
-{ return offset ? StructAtOffset<Type> (P, offset) : Null (Type); }
+/* CFF offsets can technically be negative */
+template<typename Type, typename ...Ts>
+static inline const Type& StructAtOffsetOrNull (const void *P, int offset, hb_sanitize_context_t &sc, Ts&&... ds)
+{
+  if (!offset) return Null (Type);
+
+  const char *p = (const char *) P + offset;
+  if (!sc.check_point (p)) return Null (Type);
+
+  const Type &obj = *reinterpret_cast<const Type *> (p);
+  if (!obj.sanitize (&sc, std::forward<Ts> (ds)...)) return Null (Type);
+
+  return obj;
+}
+
 
 struct code_pair_t
 {
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh b/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh
index c869e90554..1bbd463841 100644
--- a/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-cff1-table.hh
@@ -763,9 +763,9 @@ struct cff1_top_dict_values_t : top_dict_values_t<cff1_top_dict_val_t>
   unsigned int    ros_supplement;
   unsigned int    cidCount;
 
-  unsigned int    EncodingOffset;
-  unsigned int    CharsetOffset;
-  unsigned int    FDSelectOffset;
+  int             EncodingOffset;
+  int             CharsetOffset;
+  int             FDSelectOffset;
   table_info_t    privateDictInfo;
 };
 
@@ -821,24 +821,24 @@ struct cff1_top_dict_opset_t : top_dict_opset_t<cff1_top_dict_val_t>
 	break;
 
       case OpCode_Encoding:
-	dictval.EncodingOffset = env.argStack.pop_uint ();
+	dictval.EncodingOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	if (unlikely (dictval.EncodingOffset == 0)) return;
 	break;
 
       case OpCode_charset:
-	dictval.CharsetOffset = env.argStack.pop_uint ();
+	dictval.CharsetOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	if (unlikely (dictval.CharsetOffset == 0)) return;
 	break;
 
       case OpCode_FDSelect:
-	dictval.FDSelectOffset = env.argStack.pop_uint ();
+	dictval.FDSelectOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
 
       case OpCode_Private:
-	dictval.privateDictInfo.offset = env.argStack.pop_uint ();
+	dictval.privateDictInfo.offset = env.argStack.pop_int ();
 	dictval.privateDictInfo.size = env.argStack.pop_uint ();
 	env.clear_args ();
 	break;
@@ -913,7 +913,7 @@ struct cff1_private_dict_values_base_t : dict_values_t<VAL>
   }
   void fini () { dict_values_t<VAL>::fini (); }
 
-  unsigned int      subrsOffset;
+  int                 subrsOffset;
   const CFF1Subrs    *localSubrs;
 };
 
@@ -948,7 +948,7 @@ struct cff1_private_dict_opset_t : dict_opset_t
 	env.clear_args ();
 	break;
       case OpCode_Subrs:
-	dictval.subrsOffset = env.argStack.pop_uint ();
+	dictval.subrsOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
 
@@ -990,7 +990,7 @@ struct cff1_private_dict_opset_subset_t : dict_opset_t
 	break;
 
       case OpCode_Subrs:
-	dictval.subrsOffset = env.argStack.pop_uint ();
+	dictval.subrsOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
 
@@ -1090,8 +1090,8 @@ struct cff1
         goto fail;
       hb_barrier ();
 
-      topDictIndex = &StructAtOffset<CFF1TopDictIndex> (nameIndex, nameIndex->get_size ());
-      if ((topDictIndex == &Null (CFF1TopDictIndex)) || !topDictIndex->sanitize (&sc) || (topDictIndex->count == 0))
+      topDictIndex = &StructAtOffsetOrNull<CFF1TopDictIndex> (nameIndex, nameIndex->get_size (), sc);
+      if (topDictIndex == &Null (CFF1TopDictIndex) || (topDictIndex->count == 0))
         goto fail;
       hb_barrier ();
 
@@ -1108,20 +1108,18 @@ struct cff1
 	charset = &Null (Charset);
       else
       {
-	charset = &StructAtOffsetOrNull<Charset> (cff, topDict.CharsetOffset);
-	if (unlikely ((charset == &Null (Charset)) || !charset->sanitize (&sc, &num_charset_entries)))   goto fail;
-	hb_barrier ();
+	charset = &StructAtOffsetOrNull<Charset> (cff, topDict.CharsetOffset, sc, &num_charset_entries);
+	if (unlikely (charset == &Null (Charset)))   goto fail;
       }
 
       fdCount = 1;
       if (is_CID ())
       {
-	fdArray = &StructAtOffsetOrNull<CFF1FDArray> (cff, topDict.FDArrayOffset);
-	fdSelect = &StructAtOffsetOrNull<CFF1FDSelect> (cff, topDict.FDSelectOffset);
-	if (unlikely ((fdArray == &Null (CFF1FDArray)) || !fdArray->sanitize (&sc) ||
-	    (fdSelect == &Null (CFF1FDSelect)) || !fdSelect->sanitize (&sc, fdArray->count)))
+	fdArray = &StructAtOffsetOrNull<CFF1FDArray> (cff, topDict.FDArrayOffset, sc);
+	fdSelect = &StructAtOffsetOrNull<CFF1FDSelect> (cff, topDict.FDSelectOffset, sc, fdArray->count);
+	if (unlikely (fdArray == &Null (CFF1FDArray) ||
+		      fdSelect == &Null (CFF1FDSelect)))
 	  goto fail;
-	hb_barrier ();
 
 	fdCount = fdArray->count;
       }
@@ -1140,27 +1138,19 @@ struct cff1
       {
 	if (!is_predef_encoding ())
 	{
-	  encoding = &StructAtOffsetOrNull<Encoding> (cff, topDict.EncodingOffset);
-	  if (unlikely ((encoding == &Null (Encoding)) || !encoding->sanitize (&sc)))   goto fail;
-	  hb_barrier ();
+	  encoding = &StructAtOffsetOrNull<Encoding> (cff, topDict.EncodingOffset, sc);
+	  if (unlikely (encoding == &Null (Encoding)))   goto fail;
 	}
       }
 
-      stringIndex = &StructAtOffset<CFF1StringIndex> (topDictIndex, topDictIndex->get_size ());
-      if ((stringIndex == &Null (CFF1StringIndex)) || !stringIndex->sanitize (&sc))
+      stringIndex = &StructAtOffsetOrNull<CFF1StringIndex> (topDictIndex, topDictIndex->get_size (), sc);
+      if (stringIndex == &Null (CFF1StringIndex))
         goto fail;
-      hb_barrier ();
 
-      globalSubrs = &StructAtOffset<CFF1Subrs> (stringIndex, stringIndex->get_size ());
-      if ((globalSubrs != &Null (CFF1Subrs)) && !globalSubrs->sanitize (&sc))
+      globalSubrs = &StructAtOffsetOrNull<CFF1Subrs> (stringIndex, stringIndex->get_size (), sc);
+      charStrings = &StructAtOffsetOrNull<CFF1CharStrings> (cff, topDict.charStringsOffset, sc);
+      if (charStrings == &Null (CFF1CharStrings))
         goto fail;
-      hb_barrier ();
-
-      charStrings = &StructAtOffsetOrNull<CFF1CharStrings> (cff, topDict.charStringsOffset);
-
-      if ((charStrings == &Null (CFF1CharStrings)) || unlikely (!charStrings->sanitize (&sc)))
-        goto fail;
-      hb_barrier ();
 
       num_glyphs = charStrings->count;
       if (num_glyphs != sc.get_num_glyphs ())
@@ -1188,19 +1178,13 @@ struct cff1
 	  font->init ();
 	  if (unlikely (!font_interp.interpret (*font)))   goto fail;
 	  PRIVDICTVAL *priv = &privateDicts[i];
-	  const hb_ubytes_t privDictStr = StructAtOffset<UnsizedByteStr> (cff, font->privateDictInfo.offset).as_ubytes (font->privateDictInfo.size);
-	  if (unlikely (!privDictStr.sanitize (&sc)))   goto fail;
-	  hb_barrier ();
+	  const hb_ubytes_t privDictStr = StructAtOffsetOrNull<UnsizedByteStr> (cff, font->privateDictInfo.offset, sc, font->privateDictInfo.size).as_ubytes (font->privateDictInfo.size);
 	  num_interp_env_t env2 (privDictStr);
 	  dict_interpreter_t<PRIVOPSET, PRIVDICTVAL> priv_interp (env2);
 	  priv->init ();
 	  if (unlikely (!priv_interp.interpret (*priv)))   goto fail;
 
-	  priv->localSubrs = &StructAtOffsetOrNull<CFF1Subrs> (&privDictStr, priv->subrsOffset);
-	  if (priv->localSubrs != &Null (CFF1Subrs) &&
-	      unlikely (!priv->localSubrs->sanitize (&sc)))
-	    goto fail;
-	  hb_barrier ();
+	  priv->localSubrs = &StructAtOffsetOrNull<CFF1Subrs> (&privDictStr, priv->subrsOffset, sc);
 	}
       }
       else  /* non-CID */
@@ -1208,18 +1192,13 @@ struct cff1
 	cff1_top_dict_values_t *font = &topDict;
 	PRIVDICTVAL *priv = &privateDicts[0];
 
-	const hb_ubytes_t privDictStr = StructAtOffset<UnsizedByteStr> (cff, font->privateDictInfo.offset).as_ubytes (font->privateDictInfo.size);
-	if (unlikely (!privDictStr.sanitize (&sc)))   goto fail;
-	hb_barrier ();
+	const hb_ubytes_t privDictStr = StructAtOffsetOrNull<UnsizedByteStr> (cff, font->privateDictInfo.offset, sc, font->privateDictInfo.size).as_ubytes (font->privateDictInfo.size);
 	num_interp_env_t env (privDictStr);
 	dict_interpreter_t<PRIVOPSET, PRIVDICTVAL> priv_interp (env);
 	priv->init ();
 	if (unlikely (!priv_interp.interpret (*priv)))   goto fail;
 
-	priv->localSubrs = &StructAtOffsetOrNull<CFF1Subrs> (&privDictStr, priv->subrsOffset);
-	if (priv->localSubrs != &Null (CFF1Subrs) &&
-	    unlikely (!priv->localSubrs->sanitize (&sc)))
-	  goto fail;
+	priv->localSubrs = &StructAtOffsetOrNull<CFF1Subrs> (&privDictStr, priv->subrsOffset, sc);
 	hb_barrier ();
       }
 
@@ -1437,7 +1416,7 @@ struct cff1
       hb_sorted_vector_t<gname_t> *names = glyph_names.get_acquire ();
       if (unlikely (!names))
       {
-	names = (hb_sorted_vector_t<gname_t> *) hb_calloc (sizeof (hb_sorted_vector_t<gname_t>), 1);
+	names = (hb_sorted_vector_t<gname_t> *) hb_calloc (1, sizeof (hb_sorted_vector_t<gname_t>));
 	if (likely (names))
 	{
 	  names->init ();
diff --git a/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh b/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh
index 652748b737..4b3bdc9315 100644
--- a/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-cff2-table.hh
@@ -111,7 +111,7 @@ struct CFF2FDSelect
   DEFINE_SIZE_MIN (2);
 };
 
-struct CFF2VariationStore
+struct CFF2ItemVariationStore
 {
   bool sanitize (hb_sanitize_context_t *c) const
   {
@@ -122,11 +122,11 @@ struct CFF2VariationStore
 		  varStore.sanitize (c));
   }
 
-  bool serialize (hb_serialize_context_t *c, const CFF2VariationStore *varStore)
+  bool serialize (hb_serialize_context_t *c, const CFF2ItemVariationStore *varStore)
   {
     TRACE_SERIALIZE (this);
     unsigned int size_ = varStore->get_size ();
-    CFF2VariationStore *dest = c->allocate_size<CFF2VariationStore> (size_);
+    CFF2ItemVariationStore *dest = c->allocate_size<CFF2ItemVariationStore> (size_);
     if (unlikely (!dest)) return_trace (false);
     hb_memcpy (dest, varStore, size_);
     return_trace (true);
@@ -135,9 +135,9 @@ struct CFF2VariationStore
   unsigned int get_size () const { return HBUINT16::static_size + size; }
 
   HBUINT16	size;
-  VariationStore  varStore;
+  ItemVariationStore  varStore;
 
-  DEFINE_SIZE_MIN (2 + VariationStore::min_size);
+  DEFINE_SIZE_MIN (2 + ItemVariationStore::min_size);
 };
 
 struct cff2_top_dict_values_t : top_dict_values_t<>
@@ -150,8 +150,8 @@ struct cff2_top_dict_values_t : top_dict_values_t<>
   }
   void fini () { top_dict_values_t<>::fini (); }
 
-  unsigned int  vstoreOffset;
-  unsigned int  FDSelectOffset;
+  int  vstoreOffset;
+  int  FDSelectOffset;
 };
 
 struct cff2_top_dict_opset_t : top_dict_opset_t<>
@@ -169,11 +169,11 @@ struct cff2_top_dict_opset_t : top_dict_opset_t<>
 	break;
 
       case OpCode_vstore:
-	dictval.vstoreOffset = env.argStack.pop_uint ();
+	dictval.vstoreOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
       case OpCode_FDSelect:
-	dictval.FDSelectOffset = env.argStack.pop_uint ();
+	dictval.FDSelectOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
 
@@ -241,7 +241,7 @@ struct cff2_private_dict_values_base_t : dict_values_t<VAL>
   }
   void fini () { dict_values_t<VAL>::fini (); }
 
-  unsigned int      subrsOffset;
+  int                subrsOffset;
   const CFF2Subrs   *localSubrs;
   unsigned int      ivs;
 };
@@ -295,7 +295,7 @@ struct cff2_private_dict_opset_t : dict_opset_t
 	env.clear_args ();
 	break;
       case OpCode_Subrs:
-	dictval.subrsOffset = env.argStack.pop_uint ();
+	dictval.subrsOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
       case OpCode_vsindexdict:
@@ -344,7 +344,7 @@ struct cff2_private_dict_opset_subset_t : dict_opset_t
 	return;
 
       case OpCode_Subrs:
-	dictval.subrsOffset = env.argStack.pop_uint ();
+	dictval.subrsOffset = env.argStack.pop_int ();
 	env.clear_args ();
 	break;
 
@@ -426,18 +426,15 @@ struct cff2
 	if (unlikely (!top_interp.interpret (topDict))) goto fail;
       }
 
-      globalSubrs = &StructAtOffset<CFF2Subrs> (cff2, cff2->topDict + cff2->topDictSize);
-      varStore = &StructAtOffsetOrNull<CFF2VariationStore> (cff2, topDict.vstoreOffset);
-      charStrings = &StructAtOffsetOrNull<CFF2CharStrings> (cff2, topDict.charStringsOffset);
-      fdArray = &StructAtOffsetOrNull<CFF2FDArray> (cff2, topDict.FDArrayOffset);
-      fdSelect = &StructAtOffsetOrNull<CFF2FDSelect> (cff2, topDict.FDSelectOffset);
-
-      if (((varStore != &Null (CFF2VariationStore)) && unlikely (!varStore->sanitize (&sc))) ||
-	  (charStrings == &Null (CFF2CharStrings)) || unlikely (!charStrings->sanitize (&sc)) ||
-	  (globalSubrs == &Null (CFF2Subrs)) || unlikely (!globalSubrs->sanitize (&sc)) ||
-	  (fdArray == &Null (CFF2FDArray)) || unlikely (!fdArray->sanitize (&sc)) ||
-	  !hb_barrier () ||
-	  (((fdSelect != &Null (CFF2FDSelect)) && unlikely (!fdSelect->sanitize (&sc, fdArray->count)))))
+      globalSubrs = &StructAtOffsetOrNull<CFF2Subrs> (cff2, cff2->topDict + cff2->topDictSize, sc);
+      varStore = &StructAtOffsetOrNull<CFF2ItemVariationStore> (cff2, topDict.vstoreOffset, sc);
+      charStrings = &StructAtOffsetOrNull<CFF2CharStrings> (cff2, topDict.charStringsOffset, sc);
+      fdArray = &StructAtOffsetOrNull<CFF2FDArray> (cff2, topDict.FDArrayOffset, sc);
+      fdSelect = &StructAtOffsetOrNull<CFF2FDSelect> (cff2, topDict.FDSelectOffset, sc, fdArray->count);
+
+      if (charStrings == &Null (CFF2CharStrings) ||
+	  globalSubrs == &Null (CFF2Subrs) ||
+	  fdArray == &Null (CFF2FDArray))
         goto fail;
 
       num_glyphs = charStrings->count;
@@ -462,19 +459,13 @@ struct cff2
 	font->init ();
 	if (unlikely (!font_interp.interpret (*font))) goto fail;
 
-	const hb_ubytes_t privDictStr = StructAtOffsetOrNull<UnsizedByteStr> (cff2, font->privateDictInfo.offset).as_ubytes (font->privateDictInfo.size);
-	if (unlikely (!privDictStr.sanitize (&sc))) goto fail;
-	hb_barrier ();
+	const hb_ubytes_t privDictStr = StructAtOffsetOrNull<UnsizedByteStr> (cff2, font->privateDictInfo.offset, sc, font->privateDictInfo.size).as_ubytes (font->privateDictInfo.size);
 	cff2_priv_dict_interp_env_t env2 (privDictStr);
 	dict_interpreter_t<PRIVOPSET, PRIVDICTVAL, cff2_priv_dict_interp_env_t> priv_interp (env2);
 	privateDicts[i].init ();
 	if (unlikely (!priv_interp.interpret (privateDicts[i]))) goto fail;
 
-	privateDicts[i].localSubrs = &StructAtOffsetOrNull<CFF2Subrs> (&privDictStr[0], privateDicts[i].subrsOffset);
-	if (privateDicts[i].localSubrs != &Null (CFF2Subrs) &&
-	  unlikely (!privateDicts[i].localSubrs->sanitize (&sc)))
-	  goto fail;
-	hb_barrier ();
+	privateDicts[i].localSubrs = &StructAtOffsetOrNull<CFF2Subrs> (&privDictStr[0], privateDicts[i].subrsOffset, sc);
       }
 
       return;
@@ -509,7 +500,7 @@ struct cff2
     hb_blob_t			*blob = nullptr;
     cff2_top_dict_values_t	topDict;
     const CFF2Subrs		*globalSubrs = nullptr;
-    const CFF2VariationStore	*varStore = nullptr;
+    const CFF2ItemVariationStore	*varStore = nullptr;
     const CFF2CharStrings	*charStrings = nullptr;
     const CFF2FDArray		*fdArray = nullptr;
     const CFF2FDSelect		*fdSelect = nullptr;
diff --git a/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
index e2e2581855..64d2b13880 100644
--- a/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-cmap-table.hh
@@ -41,6 +41,30 @@
 
 namespace OT {
 
+static inline uint8_t unicode_to_macroman (hb_codepoint_t u)
+{
+  uint16_t mapping[] = {
+    0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
+    0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
+    0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
+    0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
+    0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
+    0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
+    0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
+    0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
+    0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
+    0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
+    0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
+    0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
+    0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
+    0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
+    0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
+    0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7
+  };
+  uint16_t *c = hb_bsearch (u, mapping, ARRAY_LENGTH (mapping), sizeof (mapping[0]),
+			    _hb_cmp_operator<uint16_t, uint16_t>);
+  return c ? (c - mapping) + 0x7F : 0;
+}
 
 struct CmapSubtableFormat0
 {
@@ -1465,8 +1489,11 @@ struct EncodingRecord
     int ret;
     ret = platformID.cmp (other.platformID);
     if (ret) return ret;
-    ret = encodingID.cmp (other.encodingID);
-    if (ret) return ret;
+    if (other.encodingID != 0xFFFF)
+    {
+      ret = encodingID.cmp (other.encodingID);
+      if (ret) return ret;
+    }
     return 0;
   }
 
@@ -1814,9 +1841,13 @@ struct cmap
                                          c->plan));
   }
 
-  const CmapSubtable *find_best_subtable (bool *symbol = nullptr) const
+  const CmapSubtable *find_best_subtable (bool *symbol = nullptr,
+					  bool *mac = nullptr,
+					  bool *macroman = nullptr) const
   {
     if (symbol) *symbol = false;
+    if (mac) *mac = false;
+    if (macroman) *macroman = false;
 
     const CmapSubtable *subtable;
 
@@ -1841,6 +1872,20 @@ struct cmap
     if ((subtable = this->find_subtable (0, 1))) return subtable;
     if ((subtable = this->find_subtable (0, 0))) return subtable;
 
+    /* MacRoman subtable. */
+    if ((subtable = this->find_subtable (1, 0)))
+    {
+      if (mac) *mac = true;
+      if (macroman) *macroman = true;
+      return subtable;
+    }
+    /* Any other Mac subtable; we just map ASCII for these. */
+    if ((subtable = this->find_subtable (1, 0xFFFF)))
+    {
+      if (mac) *mac = true;
+      return subtable;
+    }
+
     /* Meh. */
     return &Null (CmapSubtable);
   }
@@ -1852,8 +1897,8 @@ struct cmap
     accelerator_t (hb_face_t *face)
     {
       this->table = hb_sanitize_context_t ().reference_table<cmap> (face);
-      bool symbol;
-      this->subtable = table->find_best_subtable (&symbol);
+      bool symbol, mac, macroman;
+      this->subtable = table->find_best_subtable (&symbol, &mac, &macroman);
       this->subtable_uvs = &Null (CmapSubtableFormat14);
       {
 	const CmapSubtable *st = table->find_subtable (0, 5);
@@ -1862,6 +1907,7 @@ struct cmap
       }
 
       this->get_glyph_data = subtable;
+#ifndef HB_NO_CMAP_LEGACY_SUBTABLES
       if (unlikely (symbol))
       {
 	switch ((unsigned) face->table.OS2->get_font_page ()) {
@@ -1881,7 +1927,16 @@ struct cmap
 	  break;
 	}
       }
+      else if (unlikely (macroman))
+      {
+	this->get_glyph_funcZ = get_glyph_from_macroman<CmapSubtable>;
+      }
+      else if (unlikely (mac))
+      {
+	this->get_glyph_funcZ = get_glyph_from_ascii<CmapSubtable>;
+      }
       else
+#endif
       {
 	switch (subtable->u.format) {
 	/* Accelerate format 4 and format 12. */
@@ -1924,7 +1979,7 @@ struct cmap
 			    hb_codepoint_t *glyph,
 			    cache_t *cache = nullptr) const
     {
-      if (unlikely (!this->get_glyph_funcZ)) return 0;
+      if (unlikely (!this->get_glyph_funcZ)) return false;
       return _cached_get (unicode, glyph, cache);
     }
 
@@ -2006,6 +2061,28 @@ struct cmap
       return false;
     }
 
+    template <typename Type>
+    HB_INTERNAL static bool get_glyph_from_ascii (const void *obj,
+						  hb_codepoint_t codepoint,
+						  hb_codepoint_t *glyph)
+    {
+      const Type *typed_obj = (const Type *) obj;
+      return codepoint < 0x80 && typed_obj->get_glyph (codepoint, glyph);
+    }
+
+    template <typename Type>
+    HB_INTERNAL static bool get_glyph_from_macroman (const void *obj,
+						     hb_codepoint_t codepoint,
+						     hb_codepoint_t *glyph)
+    {
+      if (get_glyph_from_ascii<Type> (obj, codepoint, glyph))
+	return true;
+
+      const Type *typed_obj = (const Type *) obj;
+      unsigned c = unicode_to_macroman (codepoint);
+      return c && typed_obj->get_glyph (c, glyph);
+    }
+
     private:
     hb_nonnull_ptr_t<const CmapSubtable> subtable;
     hb_nonnull_ptr_t<const CmapSubtableFormat14> subtable_uvs;
@@ -2035,28 +2112,6 @@ struct cmap
     return &(this+result.subtable);
   }
 
-  const EncodingRecord *find_encodingrec (unsigned int platform_id,
-					  unsigned int encoding_id) const
-  {
-    EncodingRecord key;
-    key.platformID = platform_id;
-    key.encodingID = encoding_id;
-
-    return encodingRecord.as_array ().bsearch (key);
-  }
-
-  bool find_subtable (unsigned format) const
-  {
-    auto it =
-    + hb_iter (encodingRecord)
-    | hb_map (&EncodingRecord::subtable)
-    | hb_map (hb_add (this))
-    | hb_filter ([&] (const CmapSubtable& _) { return _.u.format == format; })
-    ;
-
-    return it.len ();
-  }
-
   public:
 
   bool sanitize (hb_sanitize_context_t *c) const
diff --git a/thirdparty/harfbuzz/src/hb-ot-font.cc b/thirdparty/harfbuzz/src/hb-ot-font.cc
index b3677c6a4c..1da869d697 100644
--- a/thirdparty/harfbuzz/src/hb-ot-font.cc
+++ b/thirdparty/harfbuzz/src/hb-ot-font.cc
@@ -208,12 +208,12 @@ hb_ot_get_glyph_h_advances (hb_font_t* font, void* font_data,
 
 #if !defined(HB_NO_VAR) && !defined(HB_NO_OT_FONT_ADVANCE_CACHE)
   const OT::HVAR &HVAR = *hmtx.var_table;
-  const OT::VariationStore &varStore = &HVAR + HVAR.varStore;
-  OT::VariationStore::cache_t *varStore_cache = font->num_coords * count >= 128 ? varStore.create_cache () : nullptr;
+  const OT::ItemVariationStore &varStore = &HVAR + HVAR.varStore;
+  OT::ItemVariationStore::cache_t *varStore_cache = font->num_coords * count >= 128 ? varStore.create_cache () : nullptr;
 
   bool use_cache = font->num_coords;
 #else
-  OT::VariationStore::cache_t *varStore_cache = nullptr;
+  OT::ItemVariationStore::cache_t *varStore_cache = nullptr;
   bool use_cache = false;
 #endif
 
@@ -277,7 +277,7 @@ hb_ot_get_glyph_h_advances (hb_font_t* font, void* font_data,
   }
 
 #if !defined(HB_NO_VAR) && !defined(HB_NO_OT_FONT_ADVANCE_CACHE)
-  OT::VariationStore::destroy_cache (varStore_cache);
+  OT::ItemVariationStore::destroy_cache (varStore_cache);
 #endif
 
   if (font->x_strength && !font->embolden_in_place)
@@ -313,10 +313,10 @@ hb_ot_get_glyph_v_advances (hb_font_t* font, void* font_data,
   {
 #if !defined(HB_NO_VAR) && !defined(HB_NO_OT_FONT_ADVANCE_CACHE)
     const OT::VVAR &VVAR = *vmtx.var_table;
-    const OT::VariationStore &varStore = &VVAR + VVAR.varStore;
-    OT::VariationStore::cache_t *varStore_cache = font->num_coords ? varStore.create_cache () : nullptr;
+    const OT::ItemVariationStore &varStore = &VVAR + VVAR.varStore;
+    OT::ItemVariationStore::cache_t *varStore_cache = font->num_coords ? varStore.create_cache () : nullptr;
 #else
-    OT::VariationStore::cache_t *varStore_cache = nullptr;
+    OT::ItemVariationStore::cache_t *varStore_cache = nullptr;
 #endif
 
     for (unsigned int i = 0; i < count; i++)
@@ -327,7 +327,7 @@ hb_ot_get_glyph_v_advances (hb_font_t* font, void* font_data,
     }
 
 #if !defined(HB_NO_VAR) && !defined(HB_NO_OT_FONT_ADVANCE_CACHE)
-    OT::VariationStore::destroy_cache (varStore_cache);
+    OT::ItemVariationStore::destroy_cache (varStore_cache);
 #endif
   }
   else
diff --git a/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh b/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh
index 89640b43f1..48bd536121 100644
--- a/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-hmtx-table.hh
@@ -145,6 +145,29 @@ struct hmtxvmtx
         table->minTrailingBearing = min_rsb;
         table->maxExtent = max_extent;
       }
+
+      if (T::is_horizontal)
+      {
+        const auto &OS2 = *c->plan->source->table.OS2;
+        if (OS2.has_data () &&
+            table->ascender == OS2.sTypoAscender &&
+            table->descender == OS2.sTypoDescender &&
+            table->lineGap == OS2.sTypoLineGap)
+        {
+          table->ascender = static_cast<int> (roundf (OS2.sTypoAscender +
+                                                      MVAR.get_var (HB_OT_METRICS_TAG_HORIZONTAL_ASCENDER,
+                                                                    c->plan->normalized_coords.arrayZ,
+                                                                    c->plan->normalized_coords.length)));
+          table->descender = static_cast<int> (roundf (OS2.sTypoDescender +
+                                                       MVAR.get_var (HB_OT_METRICS_TAG_HORIZONTAL_DESCENDER,
+                                                                     c->plan->normalized_coords.arrayZ,
+                                                                     c->plan->normalized_coords.length)));
+          table->lineGap = static_cast<int> (roundf (OS2.sTypoLineGap +
+                                                     MVAR.get_var (HB_OT_METRICS_TAG_HORIZONTAL_LINE_GAP,
+                                                                   c->plan->normalized_coords.arrayZ,
+                                                                   c->plan->normalized_coords.length)));
+        }
+      }
     }
 #endif
 
@@ -374,7 +397,7 @@ struct hmtxvmtx
 
     unsigned get_advance_with_var_unscaled (hb_codepoint_t  glyph,
 					    hb_font_t      *font,
-					    VariationStore::cache_t *store_cache = nullptr) const
+					    ItemVariationStore::cache_t *store_cache = nullptr) const
     {
       unsigned int advance = get_advance_without_var_unscaled (glyph);
 
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh b/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh
index a23b6377d1..0278399069 100644
--- a/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-base-table.hh
@@ -46,6 +46,12 @@ struct BaseCoordFormat1
     return HB_DIRECTION_IS_HORIZONTAL (direction) ? font->em_scale_y (coordinate) : font->em_scale_x (coordinate);
   }
 
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    return_trace ((bool) c->serializer->embed (*this));
+  }
+
   bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -67,6 +73,17 @@ struct BaseCoordFormat2
     return HB_DIRECTION_IS_HORIZONTAL (direction) ? font->em_scale_y (coordinate) : font->em_scale_x (coordinate);
   }
 
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->embed (*this);
+    if (unlikely (!out)) return_trace (false);
+
+    return_trace (c->serializer->check_assign (out->referenceGlyph,
+                                               c->plan->glyph_map->get (referenceGlyph),
+                                               HB_SERIALIZE_ERROR_INT_OVERFLOW));
+  }
+
   bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -86,7 +103,7 @@ struct BaseCoordFormat2
 struct BaseCoordFormat3
 {
   hb_position_t get_coord (hb_font_t *font,
-			   const VariationStore &var_store,
+			   const ItemVariationStore &var_store,
 			   hb_direction_t direction) const
   {
     const Device &device = this+deviceTable;
@@ -96,6 +113,23 @@ struct BaseCoordFormat3
 	 : font->em_scale_x (coordinate) + device.get_x_delta (font, var_store);
   }
 
+  void collect_variation_indices (hb_set_t& varidx_set /* OUT */) const
+  {
+    unsigned varidx = (this+deviceTable).get_variation_index ();
+    varidx_set.add (varidx);
+  }
+
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->embed (*this);
+    if (unlikely (!out)) return_trace (false);
+
+    return_trace (out->deviceTable.serialize_copy (c->serializer, deviceTable,
+                                                   this, 0,
+                                                   hb_serialize_context_t::Head,
+                                                   &c->plan->base_variation_idx_map));
+  }
 
   bool sanitize (hb_sanitize_context_t *c) const
   {
@@ -120,7 +154,7 @@ struct BaseCoord
   bool has_data () const { return u.format; }
 
   hb_position_t get_coord (hb_font_t            *font,
-			   const VariationStore &var_store,
+			   const ItemVariationStore &var_store,
 			   hb_direction_t        direction) const
   {
     switch (u.format) {
@@ -131,6 +165,27 @@ struct BaseCoord
     }
   }
 
+  void collect_variation_indices (hb_set_t& varidx_set /* OUT */) const
+  {
+    switch (u.format) {
+    case 3: u.format3.collect_variation_indices (varidx_set);
+    default:return;
+    }
+  }
+
+  template <typename context_t, typename ...Ts>
+  typename context_t::return_t dispatch (context_t *c, Ts&&... ds) const
+  {
+    if (unlikely (!c->may_dispatch (this, &u.format))) return c->no_dispatch_return_value ();
+    TRACE_DISPATCH (this, u.format);
+    switch (u.format) {
+    case 1: return_trace (c->dispatch (u.format1, std::forward<Ts> (ds)...));
+    case 2: return_trace (c->dispatch (u.format2, std::forward<Ts> (ds)...));
+    case 3: return_trace (c->dispatch (u.format3, std::forward<Ts> (ds)...));
+    default:return_trace (c->default_return_value ());
+    }
+  }
+
   bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -161,12 +216,37 @@ struct FeatMinMaxRecord
 
   bool has_data () const { return tag; }
 
+  hb_tag_t get_feature_tag () const { return tag; }
+
   void get_min_max (const BaseCoord **min, const BaseCoord **max) const
   {
     if (likely (min)) *min = &(this+minCoord);
     if (likely (max)) *max = &(this+maxCoord);
   }
 
+  void collect_variation_indices (const hb_subset_plan_t* plan,
+                                  const void *base,
+                                  hb_set_t& varidx_set /* OUT */) const
+  {
+    if (!plan->layout_features.has (tag))
+      return;
+
+    (base+minCoord).collect_variation_indices (varidx_set);
+    (base+maxCoord).collect_variation_indices (varidx_set);
+  }
+
+  bool subset (hb_subset_context_t *c,
+               const void *base) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->embed (*this);
+    if (unlikely (!out)) return_trace (false);
+    if (!(out->minCoord.serialize_subset (c, minCoord, base)))
+      return_trace (false);
+
+    return_trace (out->maxCoord.serialize_subset (c, maxCoord, base));
+  }
+
   bool sanitize (hb_sanitize_context_t *c, const void *base) const
   {
     TRACE_SANITIZE (this);
@@ -206,6 +286,39 @@ struct MinMax
     }
   }
 
+  void collect_variation_indices (const hb_subset_plan_t* plan,
+                                  hb_set_t& varidx_set /* OUT */) const
+  {
+    (this+minCoord).collect_variation_indices (varidx_set);
+    (this+maxCoord).collect_variation_indices (varidx_set);
+    for (const FeatMinMaxRecord& record : featMinMaxRecords)
+      record.collect_variation_indices (plan, this, varidx_set);
+  }
+
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->start_embed (*this);
+    if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+    if (!(out->minCoord.serialize_subset (c, minCoord, this)) ||
+        !(out->maxCoord.serialize_subset (c, maxCoord, this)))
+      return_trace (false);
+
+    unsigned len = 0;
+    for (const FeatMinMaxRecord& _ : featMinMaxRecords)
+    {
+      hb_tag_t feature_tag = _.get_feature_tag ();
+      if (!c->plan->layout_features.has (feature_tag))
+        continue;
+
+      if (!_.subset (c, this)) return false;
+      len++;
+    }
+    return_trace (c->serializer->check_assign (out->featMinMaxRecords.len, len,
+                                               HB_SERIALIZE_ERROR_INT_OVERFLOW));
+  }
+
   bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -240,6 +353,26 @@ struct BaseValues
     return this+baseCoords[baseline_tag_index];
   }
 
+  void collect_variation_indices (hb_set_t& varidx_set /* OUT */) const
+  {
+    for (const auto& _ : baseCoords)
+      (this+_).collect_variation_indices (varidx_set);
+  }
+
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->start_embed (*this);
+    if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+    out->defaultIndex = defaultIndex;
+
+    for (const auto& _ : baseCoords)
+      if (!subset_offset_array (c, out->baseCoords, this) (_))
+        return_trace (false);
+
+    return_trace (bool (out->baseCoords));
+  }
+
   bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -270,6 +403,20 @@ struct BaseLangSysRecord
 
   const MinMax &get_min_max () const { return this+minMax; }
 
+  void collect_variation_indices (const hb_subset_plan_t* plan,
+                                  hb_set_t& varidx_set /* OUT */) const
+  { (this+minMax).collect_variation_indices (plan, varidx_set); }
+
+  bool subset (hb_subset_context_t *c,
+               const void *base) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->embed (*this);
+    if (unlikely (!out)) return_trace (false);
+
+    return_trace (out->minMax.serialize_subset (c, minMax, base));
+  }
+
   bool sanitize (hb_sanitize_context_t *c, const void *base) const
   {
     TRACE_SANITIZE (this);
@@ -300,6 +447,35 @@ struct BaseScript
   bool has_values () const { return baseValues; }
   bool has_min_max () const { return defaultMinMax; /* TODO What if only per-language is present? */ }
 
+  void collect_variation_indices (const hb_subset_plan_t* plan,
+                                  hb_set_t& varidx_set /* OUT */) const
+  {
+    (this+baseValues).collect_variation_indices (varidx_set);
+    (this+defaultMinMax).collect_variation_indices (plan, varidx_set);
+    
+    for (const BaseLangSysRecord& _ : baseLangSysRecords)
+      _.collect_variation_indices (plan, varidx_set);
+  }
+
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->start_embed (*this);
+    if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+    if (baseValues && !out->baseValues.serialize_subset (c, baseValues, this))
+      return_trace (false);
+
+    if (defaultMinMax && !out->defaultMinMax.serialize_subset (c, defaultMinMax, this))
+      return_trace (false);
+
+    for (const auto& _ : baseLangSysRecords)
+      if (!_.subset (c, this)) return_trace (false);
+
+    return_trace (c->serializer->check_assign (out->baseLangSysRecords.len, baseLangSysRecords.len,
+                                               HB_SERIALIZE_ERROR_INT_OVERFLOW));
+  }
+
   bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -332,9 +508,31 @@ struct BaseScriptRecord
 
   bool has_data () const { return baseScriptTag; }
 
+  hb_tag_t get_script_tag () const { return baseScriptTag; }
+
   const BaseScript &get_base_script (const BaseScriptList *list) const
   { return list+baseScript; }
 
+  void collect_variation_indices (const hb_subset_plan_t* plan,
+                                  const void* list,
+                                  hb_set_t& varidx_set /* OUT */) const
+  {
+    if (!plan->layout_scripts.has (baseScriptTag))
+      return;
+
+    (list+baseScript).collect_variation_indices (plan, varidx_set);
+  }
+
+  bool subset (hb_subset_context_t *c,
+               const void *base) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->embed (*this);
+    if (unlikely (!out)) return_trace (false);
+
+    return_trace (out->baseScript.serialize_subset (c, baseScript, base));
+  }
+
   bool sanitize (hb_sanitize_context_t *c, const void *base) const
   {
     TRACE_SANITIZE (this);
@@ -361,6 +559,33 @@ struct BaseScriptList
     return record->has_data () ? record->get_base_script (this) : Null (BaseScript);
   }
 
+  void collect_variation_indices (const hb_subset_plan_t* plan,
+                                  hb_set_t& varidx_set /* OUT */) const
+  {
+    for (const BaseScriptRecord& _ : baseScriptRecords)
+      _.collect_variation_indices (plan, this, varidx_set);
+  }
+
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->start_embed (*this);
+    if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+    unsigned len = 0;
+    for (const BaseScriptRecord& _ : baseScriptRecords)
+    {
+      hb_tag_t script_tag = _.get_script_tag ();
+      if (!c->plan->layout_scripts.has (script_tag))
+        continue;
+
+      if (!_.subset (c, this)) return false;
+      len++;
+    }
+    return_trace (c->serializer->check_assign (out->baseScriptRecords.len, len,
+                                               HB_SERIALIZE_ERROR_INT_OVERFLOW));
+  }
+
   bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -422,6 +647,20 @@ struct Axis
     return true;
   }
 
+  void collect_variation_indices (const hb_subset_plan_t* plan,
+                                  hb_set_t& varidx_set /* OUT */) const
+  { (this+baseScriptList).collect_variation_indices (plan, varidx_set); }
+
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->embed (*this);
+    if (unlikely (!out)) return_trace (false);
+
+    out->baseTagList.serialize_copy (c->serializer, baseTagList, this);
+    return_trace (out->baseScriptList.serialize_subset (c, baseScriptList, this));
+  }
+
   bool sanitize (hb_sanitize_context_t *c) const
   {
     TRACE_SANITIZE (this);
@@ -453,8 +692,41 @@ struct BASE
   const Axis &get_axis (hb_direction_t direction) const
   { return HB_DIRECTION_IS_VERTICAL (direction) ? this+vAxis : this+hAxis; }
 
-  const VariationStore &get_var_store () const
-  { return version.to_int () < 0x00010001u ? Null (VariationStore) : this+varStore; }
+  bool has_var_store () const
+  { return version.to_int () >= 0x00010001u && varStore != 0; }
+
+  const ItemVariationStore &get_var_store () const
+  { return version.to_int () < 0x00010001u ? Null (ItemVariationStore) : this+varStore; }
+
+  void collect_variation_indices (const hb_subset_plan_t* plan,
+                                  hb_set_t& varidx_set /* OUT */) const
+  {
+    (this+hAxis).collect_variation_indices (plan, varidx_set);
+    (this+vAxis).collect_variation_indices (plan, varidx_set);
+  }
+
+  bool subset (hb_subset_context_t *c) const
+  {
+    TRACE_SUBSET (this);
+    auto *out = c->serializer->start_embed (*this);
+    if (unlikely (!out || !c->serializer->extend_min (out))) return_trace (false);
+
+    out->version = version;
+    if (hAxis && !out->hAxis.serialize_subset (c, hAxis, this))
+      return_trace (false);
+
+    if (vAxis && !out->vAxis.serialize_subset (c, vAxis, this))
+      return_trace (false);
+
+    if (has_var_store ())
+    {
+      if (!c->serializer->allocate_size<Offset32To<ItemVariationStore>> (Offset32To<ItemVariationStore>::static_size))
+        return_trace (false);
+      return_trace (out->varStore.serialize_subset (c, varStore, this, c->plan->base_varstore_inner_maps.as_array ()));
+    }
+
+    return_trace (true);
+  }
 
   bool get_baseline (hb_font_t      *font,
 		     hb_tag_t        baseline_tag,
@@ -487,7 +759,7 @@ struct BASE
 					   &min_coord, &max_coord))
       return false;
 
-    const VariationStore &var_store = get_var_store ();
+    const ItemVariationStore &var_store = get_var_store ();
     if (likely (min && min_coord)) *min = min_coord->get_coord (font, var_store, direction);
     if (likely (max && max_coord)) *max = max_coord->get_coord (font, var_store, direction);
     return true;
@@ -510,7 +782,7 @@ struct BASE
 				 * of BASE table (may be NULL) */
   Offset16To<Axis>vAxis;		/* Offset to vertical Axis table, from beginning
 				 * of BASE table (may be NULL) */
-  Offset32To<VariationStore>
+  Offset32To<ItemVariationStore>
 		varStore;	/* Offset to the table of Item Variation
 				 * Store--from beginning of BASE
 				 * header (may be NULL).  Introduced
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-common.hh b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh
index 6b359cceb7..aba427368c 100644
--- a/thirdparty/harfbuzz/src/hb-ot-layout-common.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-common.hh
@@ -188,7 +188,7 @@ struct hb_subset_layout_context_t :
   unsigned lookup_index_count;
 };
 
-struct VariationStore;
+struct ItemVariationStore;
 struct hb_collect_variation_indices_context_t :
        hb_dispatch_context_t<hb_collect_variation_indices_context_t>
 {
@@ -3036,7 +3036,7 @@ struct VarData
   DEFINE_SIZE_ARRAY (6, regionIndices);
 };
 
-struct VariationStore
+struct ItemVariationStore
 {
   friend struct item_variations_t;
   using cache_t = VarRegionList::cache_t;
@@ -3141,7 +3141,7 @@ struct VariationStore
   }
 
   bool serialize (hb_serialize_context_t *c,
-		  const VariationStore *src,
+		  const ItemVariationStore *src,
 		  const hb_array_t <const hb_inc_bimap_t> &inner_maps)
   {
     TRACE_SERIALIZE (this);
@@ -3197,7 +3197,7 @@ struct VariationStore
     return_trace (true);
   }
 
-  VariationStore *copy (hb_serialize_context_t *c) const
+  ItemVariationStore *copy (hb_serialize_context_t *c) const
   {
     TRACE_SERIALIZE (this);
     auto *out = c->start_embed (this);
@@ -3227,7 +3227,7 @@ struct VariationStore
     return_trace (false);
 #endif
 
-    VariationStore *varstore_prime = c->serializer->start_embed<VariationStore> ();
+    ItemVariationStore *varstore_prime = c->serializer->start_embed<ItemVariationStore> ();
     if (unlikely (!varstore_prime)) return_trace (false);
 
     varstore_prime->serialize (c->serializer, this, inner_maps);
@@ -4030,13 +4030,13 @@ struct VariationDevice
   private:
 
   hb_position_t get_x_delta (hb_font_t *font,
-			     const VariationStore &store,
-			     VariationStore::cache_t *store_cache = nullptr) const
+			     const ItemVariationStore &store,
+			     ItemVariationStore::cache_t *store_cache = nullptr) const
   { return font->em_scalef_x (get_delta (font, store, store_cache)); }
 
   hb_position_t get_y_delta (hb_font_t *font,
-			     const VariationStore &store,
-			     VariationStore::cache_t *store_cache = nullptr) const
+			     const ItemVariationStore &store,
+			     ItemVariationStore::cache_t *store_cache = nullptr) const
   { return font->em_scalef_y (get_delta (font, store, store_cache)); }
 
   VariationDevice* copy (hb_serialize_context_t *c,
@@ -4070,10 +4070,10 @@ struct VariationDevice
   private:
 
   float get_delta (hb_font_t *font,
-		   const VariationStore &store,
-		   VariationStore::cache_t *store_cache = nullptr) const
+		   const ItemVariationStore &store,
+		   ItemVariationStore::cache_t *store_cache = nullptr) const
   {
-    return store.get_delta (varIdx, font->coords, font->num_coords, (VariationStore::cache_t *) store_cache);
+    return store.get_delta (varIdx, font->coords, font->num_coords, (ItemVariationStore::cache_t *) store_cache);
   }
 
   protected:
@@ -4097,8 +4097,8 @@ struct DeviceHeader
 struct Device
 {
   hb_position_t get_x_delta (hb_font_t *font,
-			     const VariationStore &store=Null (VariationStore),
-			     VariationStore::cache_t *store_cache = nullptr) const
+			     const ItemVariationStore &store=Null (ItemVariationStore),
+			     ItemVariationStore::cache_t *store_cache = nullptr) const
   {
     switch (u.b.format)
     {
@@ -4115,8 +4115,8 @@ struct Device
     }
   }
   hb_position_t get_y_delta (hb_font_t *font,
-			     const VariationStore &store=Null (VariationStore),
-			     VariationStore::cache_t *store_cache = nullptr) const
+			     const ItemVariationStore &store=Null (ItemVariationStore),
+			     ItemVariationStore::cache_t *store_cache = nullptr) const
   {
     switch (u.b.format)
     {
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh
index 499ad673e4..c65ea32b8a 100644
--- a/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-layout-gsubgpos.hh
@@ -708,8 +708,8 @@ struct hb_ot_apply_context_t :
   recurse_func_t recurse_func = nullptr;
   const GDEF &gdef;
   const GDEF::accelerator_t &gdef_accel;
-  const VariationStore &var_store;
-  VariationStore::cache_t *var_store_cache;
+  const ItemVariationStore &var_store;
+  ItemVariationStore::cache_t *var_store_cache;
   hb_set_digest_t digest;
 
   hb_direction_t direction;
@@ -723,7 +723,6 @@ struct hb_ot_apply_context_t :
   bool auto_zwj = true;
   bool per_syllable = false;
   bool random = false;
-  uint32_t random_state = 1;
   unsigned new_syllables = (unsigned) -1;
 
   signed last_base = -1; // GPOS uses
@@ -766,7 +765,7 @@ struct hb_ot_apply_context_t :
   ~hb_ot_apply_context_t ()
   {
 #ifndef HB_NO_VAR
-    VariationStore::destroy_cache (var_store_cache);
+    ItemVariationStore::destroy_cache (var_store_cache);
 #endif
   }
 
@@ -788,8 +787,8 @@ struct hb_ot_apply_context_t :
   uint32_t random_number ()
   {
     /* http://www.cplusplus.com/reference/random/minstd_rand/ */
-    random_state = random_state * 48271 % 2147483647;
-    return random_state;
+    buffer->random_state = buffer->random_state * 48271 % 2147483647;
+    return buffer->random_state;
   }
 
   bool match_properties_mark (hb_codepoint_t  glyph,
diff --git a/thirdparty/harfbuzz/src/hb-ot-layout.cc b/thirdparty/harfbuzz/src/hb-ot-layout.cc
index 2eb8535db5..a4c13abadf 100644
--- a/thirdparty/harfbuzz/src/hb-ot-layout.cc
+++ b/thirdparty/harfbuzz/src/hb-ot-layout.cc
@@ -2127,7 +2127,7 @@ hb_ot_layout_get_font_extents (hb_font_t         *font,
 			       hb_tag_t           language_tag,
 			       hb_font_extents_t *extents)
 {
-  hb_position_t min, max;
+  hb_position_t min = 0, max = 0;
   if (font->face->table.BASE->get_min_max (font, direction, script_tag, language_tag, HB_TAG_NONE,
 					   &min, &max))
   {
diff --git a/thirdparty/harfbuzz/src/hb-ot-math-table.hh b/thirdparty/harfbuzz/src/hb-ot-math-table.hh
index 32e497aef6..5839059fde 100644
--- a/thirdparty/harfbuzz/src/hb-ot-math-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-math-table.hh
@@ -344,27 +344,20 @@ struct MathKern
     const MathValueRecord* kernValue = mathValueRecordsZ.arrayZ + heightCount;
     int sign = font->y_scale < 0 ? -1 : +1;
 
-    /* The description of the MathKern table is a ambiguous, but interpreting
-     * "between the two heights found at those indexes" for 0 < i < len as
-     *
-     *   correctionHeight[i-1] < correction_height <= correctionHeight[i]
-     *
-     * makes the result consistent with the limit cases and we can just use the
-     * binary search algorithm of std::upper_bound:
+    /* According to OpenType spec (v1.9), except for the boundary cases, the index
+     * chosen for kern value should be i such that
+     *    correctionHeight[i-1] <= correction_height < correctionHeight[i]
+     * We can use the binary search algorithm of std::upper_bound(). Or, we can
+     * use the internal hb_bsearch_impl.
      */
-    unsigned int i = 0;
-    unsigned int count = heightCount;
-    while (count > 0)
-    {
-      unsigned int half = count / 2;
-      hb_position_t height = correctionHeight[i + half].get_y_value (font, this);
-      if (sign * height < sign * correction_height)
-      {
-	i += half + 1;
-	count -= half + 1;
-      } else
-	count = half;
-    }
+    unsigned int pos;
+    auto cmp = +[](const void* key, const void* p,
+                   int sign, hb_font_t* font, const MathKern* mathKern) -> int {
+      return sign * *(hb_position_t*)key - sign * ((MathValueRecord*)p)->get_y_value(font, mathKern);
+    };
+    unsigned int i = hb_bsearch_impl(&pos, correction_height, correctionHeight,
+                                     heightCount, MathValueRecord::static_size,
+                                     cmp, sign, font, this) ? pos + 1 : pos;
     return kernValue[i].get_x_value (font, this);
   }
 
diff --git a/thirdparty/harfbuzz/src/hb-ot-os2-table.hh b/thirdparty/harfbuzz/src/hb-ot-os2-table.hh
index 8c2e696f56..43b58d9bbf 100644
--- a/thirdparty/harfbuzz/src/hb-ot-os2-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-os2-table.hh
@@ -223,7 +223,7 @@ struct OS2
       }
     }
 
-    return num ? (unsigned) roundf (total_width / num) : 0;
+    return num ? (unsigned) roundf ((double) total_width / (double) num) : 0;
   }
 
   bool subset (hb_subset_context_t *c) const
@@ -284,12 +284,12 @@ struct OS2
         os2_prime->usWidthClass = width_class;
     }
 
-    if (c->plan->flags & HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES)
-      return_trace (true);
-
     os2_prime->usFirstCharIndex = hb_min (0xFFFFu, c->plan->unicodes.get_min ());
     os2_prime->usLastCharIndex  = hb_min (0xFFFFu, c->plan->unicodes.get_max ());
 
+    if (c->plan->flags & HB_SUBSET_FLAGS_NO_PRUNE_UNICODE_RANGES)
+      return_trace (true);
+
     _update_unicode_ranges (&c->plan->unicodes, os2_prime->ulUnicodeRange);
 
     return_trace (true);
diff --git a/thirdparty/harfbuzz/src/hb-ot-shape.cc b/thirdparty/harfbuzz/src/hb-ot-shape.cc
index 90f596ae79..148830022e 100644
--- a/thirdparty/harfbuzz/src/hb-ot-shape.cc
+++ b/thirdparty/harfbuzz/src/hb-ot-shape.cc
@@ -155,7 +155,7 @@ hb_ot_shape_planner_t::compile (hb_ot_shape_plan_t           &plan,
 #endif
   bool has_gpos = !disable_gpos && hb_ot_layout_has_positioning (face);
   if (false)
-    ;
+    {}
 #ifndef HB_NO_AAT_SHAPE
   /* Prefer GPOS over kerx if GSUB is present;
    * https://github.com/harfbuzz/harfbuzz/issues/3008 */
@@ -167,15 +167,16 @@ hb_ot_shape_planner_t::compile (hb_ot_shape_plan_t           &plan,
 
   if (!plan.apply_kerx && (!has_gpos_kern || !plan.apply_gpos))
   {
+    if (false) {}
 #ifndef HB_NO_AAT_SHAPE
-    if (has_kerx)
+    else if (has_kerx)
       plan.apply_kerx = true;
-    else
 #endif
 #ifndef HB_NO_OT_KERN
-    if (hb_ot_layout_has_kerning (face))
+    else if (hb_ot_layout_has_kerning (face))
       plan.apply_kern = true;
 #endif
+    else {}
   }
 
   plan.apply_fallback_kern = !(plan.apply_gpos || plan.apply_kerx || plan.apply_kern);
diff --git a/thirdparty/harfbuzz/src/hb-ot-shaper-arabic.cc b/thirdparty/harfbuzz/src/hb-ot-shaper-arabic.cc
index 72dcc84df5..d70746ed2b 100644
--- a/thirdparty/harfbuzz/src/hb-ot-shaper-arabic.cc
+++ b/thirdparty/harfbuzz/src/hb-ot-shaper-arabic.cc
@@ -560,9 +560,9 @@ apply_stch (const hb_ot_shape_plan_t *plan HB_UNUSED,
 
       DEBUG_MSG (ARABIC, nullptr, "%s stretch at (%u,%u,%u)",
 		 step == MEASURE ? "measuring" : "cutting", context, start, end);
-      DEBUG_MSG (ARABIC, nullptr, "rest of word:    count=%u width %d", start - context, w_total);
-      DEBUG_MSG (ARABIC, nullptr, "fixed tiles:     count=%d width=%d", n_fixed, w_fixed);
-      DEBUG_MSG (ARABIC, nullptr, "repeating tiles: count=%d width=%d", n_repeating, w_repeating);
+      DEBUG_MSG (ARABIC, nullptr, "rest of word:    count=%u width %" PRId32, start - context, w_total);
+      DEBUG_MSG (ARABIC, nullptr, "fixed tiles:     count=%d width=%" PRId32, n_fixed, w_fixed);
+      DEBUG_MSG (ARABIC, nullptr, "repeating tiles: count=%d width=%" PRId32, n_repeating, w_repeating);
 
       /* Number of additional times to repeat each repeating tile. */
       int n_copies = 0;
@@ -602,7 +602,7 @@ apply_stch (const hb_ot_shape_plan_t *plan HB_UNUSED,
 	  if (info[k - 1].arabic_shaping_action() == STCH_REPEATING)
 	    repeat += n_copies;
 
-	  DEBUG_MSG (ARABIC, nullptr, "appending %u copies of glyph %u; j=%u",
+	  DEBUG_MSG (ARABIC, nullptr, "appending %u copies of glyph %" PRIu32 "; j=%u",
 		     repeat, info[k - 1].codepoint, j);
 	  pos[k - 1].x_advance = 0;
 	  for (unsigned int n = 0; n < repeat; n++)
diff --git a/thirdparty/harfbuzz/src/hb-ot-stat-table.hh b/thirdparty/harfbuzz/src/hb-ot-stat-table.hh
index 58b3cd74df..e88c82a13c 100644
--- a/thirdparty/harfbuzz/src/hb-ot-stat-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-stat-table.hh
@@ -349,7 +349,7 @@ struct AxisValueFormat4
 
 struct AxisValue
 {
-  bool get_value (unsigned int axis_index) const
+  float get_value (unsigned int axis_index) const
   {
     switch (u.format)
     {
@@ -357,7 +357,7 @@ struct AxisValue
     case 2: return u.format2.get_value ();
     case 3: return u.format3.get_value ();
     case 4: return u.format4.get_axis_record (axis_index).get_value ();
-    default:return 0;
+    default:return 0.f;
     }
   }
 
@@ -485,7 +485,7 @@ struct STAT
     hb_array_t<const Offset16To<AxisValue>> axis_values = get_axis_value_offsets ();
     for (unsigned int i = 0; i < axis_values.length; i++)
     {
-      const AxisValue& axis_value = this+axis_values[i];
+      const AxisValue& axis_value = this+offsetToAxisValueOffsets+axis_values[i];
       if (axis_value.get_axis_index () == axis_index)
       {
 	if (value)
diff --git a/thirdparty/harfbuzz/src/hb-ot-tag-table.hh b/thirdparty/harfbuzz/src/hb-ot-tag-table.hh
index 032a7c866c..db92f4664a 100644
--- a/thirdparty/harfbuzz/src/hb-ot-tag-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-tag-table.hh
@@ -6,8 +6,8 @@
  *
  * on files with these headers:
  *
- * <meta name="updated_at" content="2022-09-30 11:47 PM" />
- * File-Date: 2023-08-02
+ * <meta name="updated_at" content="2023-09-30 01:21 AM" />
+ * File-Date: 2024-03-07
  */
 
 #ifndef HB_OT_TAG_TABLE_HH
@@ -31,7 +31,7 @@ static const LangTag ot_languages2[] = {
   {HB_TAG('b','i',' ',' '),	HB_TAG('B','I','S',' ')},	/* Bislama */
   {HB_TAG('b','i',' ',' '),	HB_TAG('C','P','P',' ')},	/* Bislama -> Creoles */
   {HB_TAG('b','m',' ',' '),	HB_TAG('B','M','B',' ')},	/* Bambara (Bamanankan) */
-  {HB_TAG('b','n',' ',' '),	HB_TAG('B','E','N',' ')},	/* Bengali */
+  {HB_TAG('b','n',' ',' '),	HB_TAG('B','E','N',' ')},	/* Bangla */
   {HB_TAG('b','o',' ',' '),	HB_TAG('T','I','B',' ')},	/* Tibetan */
   {HB_TAG('b','r',' ',' '),	HB_TAG('B','R','E',' ')},	/* Breton */
   {HB_TAG('b','s',' ',' '),	HB_TAG('B','O','S',' ')},	/* Bosnian */
@@ -64,7 +64,7 @@ static const LangTag ot_languages2[] = {
   {HB_TAG('f','r',' ',' '),	HB_TAG('F','R','A',' ')},	/* French */
   {HB_TAG('f','y',' ',' '),	HB_TAG('F','R','I',' ')},	/* Western Frisian -> Frisian */
   {HB_TAG('g','a',' ',' '),	HB_TAG('I','R','I',' ')},	/* Irish */
-  {HB_TAG('g','d',' ',' '),	HB_TAG('G','A','E',' ')},	/* Scottish Gaelic (Gaelic) */
+  {HB_TAG('g','d',' ',' '),	HB_TAG('G','A','E',' ')},	/* Scottish Gaelic */
   {HB_TAG('g','l',' ',' '),	HB_TAG('G','A','L',' ')},	/* Galician */
   {HB_TAG('g','n',' ',' '),	HB_TAG('G','U','A',' ')},	/* Guarani [macrolanguage] */
   {HB_TAG('g','u',' ',' '),	HB_TAG('G','U','J',' ')},	/* Gujarati */
@@ -132,7 +132,7 @@ static const LangTag ot_languages2[] = {
   {HB_TAG('m','l',' ',' '),	HB_TAG('M','A','L',' ')},	/* Malayalam -> Malayalam Traditional */
   {HB_TAG('m','l',' ',' '),	HB_TAG('M','L','R',' ')},	/* Malayalam -> Malayalam Reformed */
   {HB_TAG('m','n',' ',' '),	HB_TAG('M','N','G',' ')},	/* Mongolian [macrolanguage] */
-  {HB_TAG('m','o',' ',' '),	HB_TAG('M','O','L',' ')},	/* Moldavian (retired code) */
+  {HB_TAG('m','o',' ',' '),	HB_TAG('M','O','L',' ')},	/* Moldavian (retired code) -> Romanian (Moldova) */
   {HB_TAG('m','o',' ',' '),	HB_TAG('R','O','M',' ')},	/* Moldavian (retired code) -> Romanian */
   {HB_TAG('m','r',' ',' '),	HB_TAG('M','A','R',' ')},	/* Marathi */
   {HB_TAG('m','s',' ',' '),	HB_TAG('M','L','Y',' ')},	/* Malay [macrolanguage] */
@@ -153,7 +153,7 @@ static const LangTag ot_languages2[] = {
   {HB_TAG('o','c',' ',' '),	HB_TAG('O','C','I',' ')},	/* Occitan (post 1500) */
   {HB_TAG('o','j',' ',' '),	HB_TAG('O','J','B',' ')},	/* Ojibwa [macrolanguage] -> Ojibway */
   {HB_TAG('o','m',' ',' '),	HB_TAG('O','R','O',' ')},	/* Oromo [macrolanguage] */
-  {HB_TAG('o','r',' ',' '),	HB_TAG('O','R','I',' ')},	/* Odia (formerly Oriya) [macrolanguage] */
+  {HB_TAG('o','r',' ',' '),	HB_TAG('O','R','I',' ')},	/* Odia [macrolanguage] */
   {HB_TAG('o','s',' ',' '),	HB_TAG('O','S','S',' ')},	/* Ossetian */
   {HB_TAG('p','a',' ',' '),	HB_TAG('P','A','N',' ')},	/* Punjabi */
   {HB_TAG('p','i',' ',' '),	HB_TAG('P','A','L',' ')},	/* Pali */
@@ -166,7 +166,7 @@ static const LangTag ot_languages2[] = {
   {HB_TAG('r','o',' ',' '),	HB_TAG('R','O','M',' ')},	/* Romanian */
   {HB_TAG('r','u',' ',' '),	HB_TAG('R','U','S',' ')},	/* Russian */
   {HB_TAG('r','w',' ',' '),	HB_TAG('R','U','A',' ')},	/* Kinyarwanda */
-  {HB_TAG('s','a',' ',' '),	HB_TAG('S','A','N',' ')},	/* Sanskrit */
+  {HB_TAG('s','a',' ',' '),	HB_TAG('S','A','N',' ')},	/* Sanskrit [macrolanguage] */
   {HB_TAG('s','c',' ',' '),	HB_TAG('S','R','D',' ')},	/* Sardinian [macrolanguage] */
   {HB_TAG('s','d',' ',' '),	HB_TAG('S','N','D',' ')},	/* Sindhi */
   {HB_TAG('s','e',' ',' '),	HB_TAG('N','S','M',' ')},	/* Northern Sami */
@@ -465,6 +465,7 @@ static const LangTag ot_languages3[] = {
   {HB_TAG('c','l','d',' '),	HB_TAG('S','Y','R',' ')},	/* Chaldean Neo-Aramaic -> Syriac */
   {HB_TAG('c','l','e',' '),	HB_TAG('C','C','H','N')},	/* Lealao Chinantec -> Chinantec */
   {HB_TAG('c','l','j',' '),	HB_TAG('Q','I','N',' ')},	/* Laitu Chin -> Chin */
+  {HB_TAG('c','l','s',' '),	HB_TAG('S','A','N',' ')},	/* Classical Sanskrit -> Sanskrit */
   {HB_TAG('c','l','t',' '),	HB_TAG('Q','I','N',' ')},	/* Lautu Chin -> Chin */
   {HB_TAG('c','m','n',' '),	HB_TAG('Z','H','S',' ')},	/* Mandarin Chinese -> Chinese, Simplified */
   {HB_TAG('c','m','r',' '),	HB_TAG('Q','I','N',' ')},	/* Mro-Khimi Chin -> Chin */
@@ -637,7 +638,7 @@ static const LangTag ot_languages3[] = {
   {HB_TAG('g','a','a',' '),	HB_TAG('G','A','D',' ')},	/* Ga */
   {HB_TAG('g','a','c',' '),	HB_TAG('C','P','P',' ')},	/* Mixed Great Andamanese -> Creoles */
   {HB_TAG('g','a','d',' '),	HB_TAG_NONE	       },	/* Gaddang != Ga */
-  {HB_TAG('g','a','e',' '),	HB_TAG_NONE	       },	/* Guarequena != Scottish Gaelic (Gaelic) */
+  {HB_TAG('g','a','e',' '),	HB_TAG_NONE	       },	/* Guarequena != Scottish Gaelic */
 /*{HB_TAG('g','a','g',' '),	HB_TAG('G','A','G',' ')},*/	/* Gagauz */
   {HB_TAG('g','a','l',' '),	HB_TAG_NONE	       },	/* Galolen != Galician */
   {HB_TAG('g','a','n',' '),	HB_TAG('Z','H','S',' ')},	/* Gan Chinese -> Chinese, Simplified */
@@ -1160,7 +1161,7 @@ static const LangTag ot_languages3[] = {
   {HB_TAG('o','r','o',' '),	HB_TAG_NONE	       },	/* Orokolo != Oromo */
   {HB_TAG('o','r','r',' '),	HB_TAG('I','J','O',' ')},	/* Oruma -> Ijo */
   {HB_TAG('o','r','s',' '),	HB_TAG('M','L','Y',' ')},	/* Orang Seletar -> Malay */
-  {HB_TAG('o','r','y',' '),	HB_TAG('O','R','I',' ')},	/* Odia (formerly Oriya) */
+  {HB_TAG('o','r','y',' '),	HB_TAG('O','R','I',' ')},	/* Odia */
   {HB_TAG('o','t','w',' '),	HB_TAG('O','J','B',' ')},	/* Ottawa -> Ojibway */
   {HB_TAG('o','u','a',' '),	HB_TAG('B','B','R',' ')},	/* Tagargrent -> Berber */
   {HB_TAG('p','a','a',' '),	HB_TAG_NONE	       },	/* Papuan [collection] != Palestinian Aramaic */
@@ -1395,7 +1396,7 @@ static const LangTag ot_languages3[] = {
 /*{HB_TAG('s','n','k',' '),	HB_TAG('S','N','K',' ')},*/	/* Soninke */
   {HB_TAG('s','o','g',' '),	HB_TAG_NONE	       },	/* Sogdian != Sodo Gurage */
 /*{HB_TAG('s','o','p',' '),	HB_TAG('S','O','P',' ')},*/	/* Songe */
-  {HB_TAG('s','p','v',' '),	HB_TAG('O','R','I',' ')},	/* Sambalpuri -> Odia (formerly Oriya) */
+  {HB_TAG('s','p','v',' '),	HB_TAG('O','R','I',' ')},	/* Sambalpuri -> Odia */
   {HB_TAG('s','p','y',' '),	HB_TAG('K','A','L',' ')},	/* Sabaot -> Kalenjin */
   {HB_TAG('s','r','b',' '),	HB_TAG_NONE	       },	/* Sora != Serbian */
   {HB_TAG('s','r','c',' '),	HB_TAG('S','R','D',' ')},	/* Logudorese Sardinian -> Sardinian */
@@ -1533,6 +1534,7 @@ static const LangTag ot_languages3[] = {
   {HB_TAG('v','l','s',' '),	HB_TAG('F','L','E',' ')},	/* Vlaams -> Dutch (Flemish) */
   {HB_TAG('v','m','w',' '),	HB_TAG('M','A','K',' ')},	/* Makhuwa */
 /*{HB_TAG('v','r','o',' '),	HB_TAG('V','R','O',' ')},*/	/* Võro */
+  {HB_TAG('v','s','n',' '),	HB_TAG('S','A','N',' ')},	/* Vedic Sanskrit -> Sanskrit */
   {HB_TAG('w','a','g',' '),	HB_TAG_NONE	       },	/* Wa'ema != Wagdi */
 /*{HB_TAG('w','a','r',' '),	HB_TAG('W','A','R',' ')},*/	/* Waray (Philippines) -> Waray-Waray */
   {HB_TAG('w','b','m',' '),	HB_TAG('W','A',' ',' ')},	/* Wa */
@@ -2643,7 +2645,7 @@ out:
       /* Romanian; Moldova */
       unsigned int i;
       hb_tag_t possible_tags[] = {
-	HB_TAG('M','O','L',' '),  /* Moldavian */
+	HB_TAG('M','O','L',' '),  /* Romanian (Moldova) */
 	HB_TAG('R','O','M',' '),  /* Romanian */
       };
       for (i = 0; i < 2 && i < *count; i++)
@@ -2920,7 +2922,7 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
     return hb_language_from_string ("mn", -1);  /* Mongolian [macrolanguage] */
   case HB_TAG('M','N','K',' '):  /* Maninka */
     return hb_language_from_string ("man", -1);  /* Mandingo [macrolanguage] */
-  case HB_TAG('M','O','L',' '):  /* Moldavian */
+  case HB_TAG('M','O','L',' '):  /* Romanian (Moldova) */
     return hb_language_from_string ("ro-MD", -1);  /* Romanian; Moldova */
   case HB_TAG('M','O','N','T'):  /* Thailand Mon */
     return hb_language_from_string ("mnw-TH", -1);  /* Mon; Thailand */
@@ -2958,6 +2960,8 @@ hb_ot_ambiguous_tag_to_language (hb_tag_t tag)
     return hb_language_from_string ("ro", -1);  /* Romanian */
   case HB_TAG('R','O','Y',' '):  /* Romany */
     return hb_language_from_string ("rom", -1);  /* Romany [macrolanguage] */
+  case HB_TAG('S','A','N',' '):  /* Sanskrit */
+    return hb_language_from_string ("sa", -1);  /* Sanskrit [macrolanguage] */
   case HB_TAG('S','Q','I',' '):  /* Albanian */
     return hb_language_from_string ("sq", -1);  /* Albanian [macrolanguage] */
   case HB_TAG('S','R','B',' '):  /* Serbian */
diff --git a/thirdparty/harfbuzz/src/hb-ot-tag.cc b/thirdparty/harfbuzz/src/hb-ot-tag.cc
index 53b6b38f66..0c63756b14 100644
--- a/thirdparty/harfbuzz/src/hb-ot-tag.cc
+++ b/thirdparty/harfbuzz/src/hb-ot-tag.cc
@@ -547,7 +547,7 @@ hb_ot_tag_to_language (hb_tag_t tag)
       buf[3] = '-';
       str += 4;
     }
-    snprintf (str, 16, "x-hbot-%08x", tag);
+    snprintf (str, 16, "x-hbot-%08" PRIx32, tag);
     return hb_language_from_string (&*buf, -1);
   }
 }
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh
index b2e5d87a3c..9149959d79 100644
--- a/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-var-avar-table.hh
@@ -57,7 +57,7 @@ struct avarV2Tail
 
   protected:
   Offset32To<DeltaSetIndexMap>	varIdxMap;	/* Offset from the beginning of 'avar' table. */
-  Offset32To<VariationStore>	varStore;	/* Offset from the beginning of 'avar' table. */
+  Offset32To<ItemVariationStore>	varStore;	/* Offset from the beginning of 'avar' table. */
 
   public:
   DEFINE_SIZE_STATIC (8);
@@ -230,7 +230,7 @@ struct SegmentMaps : Array16Of<AxisValueMap>
        * duplicates here */
       if (mapping.must_include ())
         continue;
-      value_mappings.push (std::move (mapping));
+      value_mappings.push (mapping);
     }
 
     AxisValueMap m;
@@ -343,7 +343,7 @@ struct avar
     for (unsigned i = 0; i < coords_length; i++)
       coords[i] = out[i];
 
-    OT::VariationStore::destroy_cache (var_store_cache);
+    OT::ItemVariationStore::destroy_cache (var_store_cache);
 #endif
   }
 
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-common.hh b/thirdparty/harfbuzz/src/hb-ot-var-common.hh
index eff6df380f..379e164059 100644
--- a/thirdparty/harfbuzz/src/hb-ot-var-common.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-var-common.hh
@@ -28,6 +28,7 @@
 
 #include "hb-ot-layout-common.hh"
 #include "hb-priority-queue.hh"
+#include "hb-subset-instancer-iup.hh"
 
 
 namespace OT {
@@ -221,9 +222,9 @@ struct DeltaSetIndexMap
 };
 
 
-struct VarStoreInstancer
+struct ItemVarStoreInstancer
 {
-  VarStoreInstancer (const VariationStore *varStore,
+  ItemVarStoreInstancer (const ItemVariationStore *varStore,
 		     const DeltaSetIndexMap *varIdxMap,
 		     hb_array_t<int> coords) :
     varStore (varStore), varIdxMap (varIdxMap), coords (coords) {}
@@ -235,7 +236,7 @@ struct VarStoreInstancer
   float operator() (uint32_t varIdx, unsigned short offset = 0) const
   { return coords ? varStore->get_delta (varIdxMap ? varIdxMap->map (VarIdx::add (varIdx, offset)) : varIdx + offset, coords) : 0; }
 
-  const VariationStore *varStore;
+  const ItemVariationStore *varStore;
   const DeltaSetIndexMap *varIdxMap;
   hb_array_t<int> coords;
 };
@@ -460,7 +461,7 @@ struct tuple_delta_t
   tuple_delta_t () = default;
   tuple_delta_t (const tuple_delta_t& o) = default;
 
-  friend void swap (tuple_delta_t& a, tuple_delta_t& b)
+  friend void swap (tuple_delta_t& a, tuple_delta_t& b) noexcept
   {
     hb_swap (a.axis_tuples, b.axis_tuples);
     hb_swap (a.indices, b.indices);
@@ -471,10 +472,10 @@ struct tuple_delta_t
     hb_swap (a.compiled_peak_coords, b.compiled_peak_coords);
   }
 
-  tuple_delta_t (tuple_delta_t&& o) : tuple_delta_t ()
+  tuple_delta_t (tuple_delta_t&& o)  noexcept : tuple_delta_t ()
   { hb_swap (*this, o); }
 
-  tuple_delta_t& operator = (tuple_delta_t&& o)
+  tuple_delta_t& operator = (tuple_delta_t&& o) noexcept
   {
     hb_swap (*this, o);
     return *this;
@@ -609,7 +610,9 @@ struct tuple_delta_t
                                  const hb_map_t& axes_old_index_tag_map,
                                  const hb_hashmap_t<const hb_vector_t<char>*, unsigned>* shared_tuples_idx_map)
   {
-    if (!compiled_deltas) return false;
+    /* compiled_deltas could be empty after iup delta optimization, we can skip
+     * compiling this tuple and return true */
+    if (!compiled_deltas) return true;
 
     unsigned cur_axis_count = axes_index_map.get_population ();
     /* allocate enough memory: 1 peak + 2 intermediate coords + fixed header size */
@@ -723,22 +726,28 @@ struct tuple_delta_t
   }
 
   bool compile_deltas ()
+  { return compile_deltas (indices, deltas_x, deltas_y, compiled_deltas); }
+
+  bool compile_deltas (const hb_vector_t<bool> &point_indices,
+                       const hb_vector_t<float> &x_deltas,
+                       const hb_vector_t<float> &y_deltas,
+                       hb_vector_t<char> &compiled_deltas /* OUT */)
   {
     hb_vector_t<int> rounded_deltas;
-    if (unlikely (!rounded_deltas.alloc (indices.length)))
+    if (unlikely (!rounded_deltas.alloc (point_indices.length)))
       return false;
 
-    for (unsigned i = 0; i < indices.length; i++)
+    for (unsigned i = 0; i < point_indices.length; i++)
     {
-      if (!indices[i]) continue;
-      int rounded_delta = (int) roundf (deltas_x[i]);
+      if (!point_indices[i]) continue;
+      int rounded_delta = (int) roundf (x_deltas.arrayZ[i]);
       rounded_deltas.push (rounded_delta);
     }
 
-    if (!rounded_deltas) return false;
+    if (!rounded_deltas) return true;
     /* allocate enough memories 3 * num_deltas */
     unsigned alloc_len = 3 * rounded_deltas.length;
-    if (deltas_y)
+    if (y_deltas)
       alloc_len *= 2;
 
     if (unlikely (!compiled_deltas.resize (alloc_len))) return false;
@@ -746,14 +755,14 @@ struct tuple_delta_t
     unsigned i = 0;
     unsigned encoded_len = encode_delta_run (i, compiled_deltas.as_array (), rounded_deltas);
 
-    if (deltas_y)
+    if (y_deltas)
     {
-      /* reuse the rounded_deltas vector, check that deltas_y have the same num of deltas as deltas_x */
+      /* reuse the rounded_deltas vector, check that y_deltas have the same num of deltas as x_deltas */
       unsigned j = 0;
-      for (unsigned idx = 0; idx < indices.length; idx++)
+      for (unsigned idx = 0; idx < point_indices.length; idx++)
       {
-        if (!indices[idx]) continue;
-        int rounded_delta = (int) roundf (deltas_y[idx]);
+        if (!point_indices[idx]) continue;
+        int rounded_delta = (int) roundf (y_deltas.arrayZ[idx]);
 
         if (j >= rounded_deltas.length) return false;
 
@@ -761,7 +770,7 @@ struct tuple_delta_t
       }
 
       if (j != rounded_deltas.length) return false;
-      /* reset i because we reuse rounded_deltas for deltas_y */
+      /* reset i because we reuse rounded_deltas for y_deltas */
       i = 0;
       encoded_len += encode_delta_run (i, compiled_deltas.as_array ().sub_array (encoded_len), rounded_deltas);
     }
@@ -1020,6 +1029,171 @@ struct tuple_delta_t
     return true;
   }
 
+  bool optimize (const contour_point_vector_t& contour_points,
+                 bool is_composite,
+                 float tolerance = 0.5f)
+  {
+    unsigned count = contour_points.length;
+    if (deltas_x.length != count ||
+        deltas_y.length != count)
+      return false;
+
+    hb_vector_t<bool> opt_indices;
+    hb_vector_t<int> rounded_x_deltas, rounded_y_deltas;
+
+    if (unlikely (!rounded_x_deltas.alloc (count) ||
+                  !rounded_y_deltas.alloc (count)))
+      return false;
+
+    for (unsigned i = 0; i < count; i++)
+    {
+      int rounded_x_delta = (int) roundf (deltas_x.arrayZ[i]);
+      int rounded_y_delta = (int) roundf (deltas_y.arrayZ[i]);
+      rounded_x_deltas.push (rounded_x_delta);
+      rounded_y_deltas.push (rounded_y_delta);
+    }
+
+    if (!iup_delta_optimize (contour_points, rounded_x_deltas, rounded_y_deltas, opt_indices, tolerance))
+      return false;
+
+    unsigned ref_count = 0;
+    for (bool ref_flag : opt_indices)
+       ref_count += ref_flag;
+
+    if (ref_count == count) return true;
+
+    hb_vector_t<float> opt_deltas_x, opt_deltas_y;
+    bool is_comp_glyph_wo_deltas = (is_composite && ref_count == 0);
+    if (is_comp_glyph_wo_deltas)
+    {
+      if (unlikely (!opt_deltas_x.resize (count) ||
+                    !opt_deltas_y.resize (count)))
+        return false;
+
+      opt_indices.arrayZ[0] = true;
+      for (unsigned i = 1; i < count; i++)
+        opt_indices.arrayZ[i] = false;
+    }
+
+    hb_vector_t<char> opt_point_data;
+    if (!compile_point_set (opt_indices, opt_point_data))
+      return false;
+    hb_vector_t<char> opt_deltas_data;
+    if (!compile_deltas (opt_indices,
+                         is_comp_glyph_wo_deltas ? opt_deltas_x : deltas_x,
+                         is_comp_glyph_wo_deltas ? opt_deltas_y : deltas_y,
+                         opt_deltas_data))
+      return false;
+
+    hb_vector_t<char> point_data;
+    if (!compile_point_set (indices, point_data))
+      return false;
+    hb_vector_t<char> deltas_data;
+    if (!compile_deltas (indices, deltas_x, deltas_y, deltas_data))
+      return false;
+
+    if (opt_point_data.length + opt_deltas_data.length < point_data.length + deltas_data.length)
+    {
+      indices.fini ();
+      indices = std::move (opt_indices);
+
+      if (is_comp_glyph_wo_deltas)
+      {
+        deltas_x.fini ();
+        deltas_x = std::move (opt_deltas_x);
+
+        deltas_y.fini ();
+        deltas_y = std::move (opt_deltas_y);
+      }
+    }
+    return !indices.in_error () && !deltas_x.in_error () && !deltas_y.in_error ();
+  }
+
+  static bool compile_point_set (const hb_vector_t<bool> &point_indices,
+                                 hb_vector_t<char>& compiled_points /* OUT */)
+  {
+    unsigned num_points = 0;
+    for (bool i : point_indices)
+      if (i) num_points++;
+
+    /* when iup optimization is enabled, num of referenced points could be 0 */
+    if (!num_points) return true;
+
+    unsigned indices_length = point_indices.length;
+    /* If the points set consists of all points in the glyph, it's encoded with a
+     * single zero byte */
+    if (num_points == indices_length)
+      return compiled_points.resize (1);
+
+    /* allocate enough memories: 2 bytes for count + 3 bytes for each point */
+    unsigned num_bytes = 2 + 3 *num_points;
+    if (unlikely (!compiled_points.resize (num_bytes, false)))
+      return false;
+
+    unsigned pos = 0;
+    /* binary data starts with the total number of reference points */
+    if (num_points < 0x80)
+      compiled_points.arrayZ[pos++] = num_points;
+    else
+    {
+      compiled_points.arrayZ[pos++] = ((num_points >> 8) | 0x80);
+      compiled_points.arrayZ[pos++] = num_points & 0xFF;
+    }
+
+    const unsigned max_run_length = 0x7F;
+    unsigned i = 0;
+    unsigned last_value = 0;
+    unsigned num_encoded = 0;
+    while (i < indices_length && num_encoded < num_points)
+    {
+      unsigned run_length = 0;
+      unsigned header_pos = pos;
+      compiled_points.arrayZ[pos++] = 0;
+
+      bool use_byte_encoding = false;
+      bool new_run = true;
+      while (i < indices_length && num_encoded < num_points &&
+             run_length <= max_run_length)
+      {
+        // find out next referenced point index
+        while (i < indices_length && !point_indices[i])
+          i++;
+
+        if (i >= indices_length) break;
+
+        unsigned cur_value = i;
+        unsigned delta = cur_value - last_value;
+
+        if (new_run)
+        {
+          use_byte_encoding = (delta <= 0xFF);
+          new_run = false;
+        }
+
+        if (use_byte_encoding && delta > 0xFF)
+          break;
+
+        if (use_byte_encoding)
+          compiled_points.arrayZ[pos++] = delta;
+        else
+        {
+          compiled_points.arrayZ[pos++] = delta >> 8;
+          compiled_points.arrayZ[pos++] = delta & 0xFF;
+        }
+        i++;
+        last_value = cur_value;
+        run_length++;
+        num_encoded++;
+      }
+
+      if (use_byte_encoding)
+        compiled_points.arrayZ[header_pos] = run_length - 1;
+      else
+        compiled_points.arrayZ[header_pos] = (run_length - 1) | 0x80;
+    }
+    return compiled_points.resize (pos, false);
+  }
+
   static float infer_delta (float target_val, float prev_val, float next_val, float prev_delta, float next_delta)
   {
     if (prev_val == next_val)
@@ -1071,41 +1245,41 @@ struct TupleVariationData
 
     private:
     /* referenced point set->compiled point data map */
-    hb_hashmap_t<const hb_vector_t<bool>*, hb_bytes_t> point_data_map;
+    hb_hashmap_t<const hb_vector_t<bool>*, hb_vector_t<char>> point_data_map;
     /* referenced point set-> count map, used in finding shared points */
     hb_hashmap_t<const hb_vector_t<bool>*, unsigned> point_set_count_map;
 
     /* empty for non-gvar tuples.
-     * shared_points_bytes is just a copy of some value in the point_data_map,
+     * shared_points_bytes is a pointer to some value in the point_data_map,
      * which will be freed during map destruction. Save it for serialization, so
      * no need to do find_shared_points () again */
-    hb_bytes_t shared_points_bytes;
+    hb_vector_t<char> *shared_points_bytes = nullptr;
 
     /* total compiled byte size as TupleVariationData format, initialized to its
      * min_size: 4 */
     unsigned compiled_byte_size = 4;
 
+    /* for gvar iup delta optimization: whether this is a composite glyph */
+    bool is_composite = false;
+
     public:
     tuple_variations_t () = default;
     tuple_variations_t (const tuple_variations_t&) = delete;
     tuple_variations_t& operator=(const tuple_variations_t&) = delete;
     tuple_variations_t (tuple_variations_t&&) = default;
     tuple_variations_t& operator=(tuple_variations_t&&) = default;
-    ~tuple_variations_t () { fini (); }
-    void fini ()
-    {
-      for (auto _ : point_data_map.values ())
-        _.fini ();
-
-      point_set_count_map.fini ();
-      tuple_vars.fini ();
-    }
+    ~tuple_variations_t () = default;
 
     explicit operator bool () const { return bool (tuple_vars); }
     unsigned get_var_count () const
     {
-      unsigned count = tuple_vars.length;
-      if (shared_points_bytes.length)
+      unsigned count = 0;
+      /* when iup delta opt is enabled, compiled_deltas could be empty and we
+       * should skip this tuple */
+      for (auto& tuple: tuple_vars)
+        if (tuple.compiled_deltas) count++;
+
+      if (shared_points_bytes && shared_points_bytes->length)
         count |= TupleVarCount::SharedPointNumbers;
       return count;
     }
@@ -1119,26 +1293,27 @@ struct TupleVariationData
                                      bool is_gvar,
                                      const hb_map_t *axes_old_index_tag_map,
                                      const hb_vector_t<unsigned> &shared_indices,
-                                     const hb_array_t<const F2DOT14> shared_tuples)
+                                     const hb_array_t<const F2DOT14> shared_tuples,
+                                     bool is_composite_glyph)
     {
       do
       {
         const HBUINT8 *p = iterator.get_serialized_data ();
         unsigned int length = iterator.current_tuple->get_data_size ();
         if (unlikely (!iterator.var_data_bytes.check_range (p, length)))
-        { fini (); return false; }
+          return false;
 
         hb_hashmap_t<hb_tag_t, Triple> axis_tuples;
         if (!iterator.current_tuple->unpack_axis_tuples (iterator.get_axis_count (), shared_tuples, axes_old_index_tag_map, axis_tuples)
             || axis_tuples.is_empty ())
-        { fini (); return false; }
+          return false;
 
         hb_vector_t<unsigned> private_indices;
         bool has_private_points = iterator.current_tuple->has_private_points ();
         const HBUINT8 *end = p + length;
         if (has_private_points &&
             !TupleVariationData::unpack_points (p, private_indices, end))
-        { fini (); return false; }
+          return false;
 
         const hb_vector_t<unsigned> &indices = has_private_points ? private_indices : shared_indices;
         bool apply_to_all = (indices.length == 0);
@@ -1148,24 +1323,24 @@ struct TupleVariationData
 
         if (unlikely (!deltas_x.resize (num_deltas, false) ||
                       !TupleVariationData::unpack_deltas (p, deltas_x, end)))
-        { fini (); return false; }
+          return false;
 
         hb_vector_t<int> deltas_y;
         if (is_gvar)
         {
           if (unlikely (!deltas_y.resize (num_deltas, false) ||
                         !TupleVariationData::unpack_deltas (p, deltas_y, end)))
-          { fini (); return false; }
+            return false;
         }
 
         tuple_delta_t var;
         var.axis_tuples = std::move (axis_tuples);
         if (unlikely (!var.indices.resize (point_count) ||
                       !var.deltas_x.resize (point_count, false)))
-        { fini (); return false; }
+          return false;
 
         if (is_gvar && unlikely (!var.deltas_y.resize (point_count, false)))
-        { fini (); return false; }
+          return false;
 
         for (unsigned i = 0; i < num_deltas; i++)
         {
@@ -1178,6 +1353,8 @@ struct TupleVariationData
         }
         tuple_vars.push (std::move (var));
       } while (iterator.move_to_next ());
+
+      is_composite = is_composite_glyph;
       return true;
     }
 
@@ -1261,7 +1438,7 @@ struct TupleVariationData
           unsigned new_len = new_vars.length + out.length;
 
           if (unlikely (!new_vars.alloc (new_len, false)))
-          { fini (); return false;}
+            return false;
 
           for (unsigned i = 0; i < out.length; i++)
             new_vars.push (std::move (out[i]));
@@ -1272,8 +1449,9 @@ struct TupleVariationData
       return true;
     }
 
-    /* merge tuple variations with overlapping tents */
-    void merge_tuple_variations ()
+    /* merge tuple variations with overlapping tents, if iup delta optimization
+     * is enabled, add default deltas to contour_points */
+    bool merge_tuple_variations (contour_point_vector_t* contour_points = nullptr)
     {
       hb_vector_t<tuple_delta_t> new_vars;
       hb_hashmap_t<const hb_hashmap_t<hb_tag_t, Triple>*, unsigned> m;
@@ -1281,7 +1459,15 @@ struct TupleVariationData
       for (const tuple_delta_t& var : tuple_vars)
       {
         /* if all axes are pinned, drop the tuple variation */
-        if (var.axis_tuples.is_empty ()) continue;
+        if (var.axis_tuples.is_empty ())
+        {
+          /* if iup_delta_optimize is enabled, add deltas to contour coords */
+          if (contour_points && !contour_points->add_deltas (var.deltas_x,
+                                                             var.deltas_y,
+                                                             var.indices))
+            return false;
+          continue;
+        }
 
         unsigned *idx;
         if (m.has (&(var.axis_tuples), &idx))
@@ -1291,98 +1477,14 @@ struct TupleVariationData
         else
         {
           new_vars.push (var);
-          m.set (&(var.axis_tuples), i);
+          if (!m.set (&(var.axis_tuples), i))
+            return false;
           i++;
         }
       }
       tuple_vars.fini ();
       tuple_vars = std::move (new_vars);
-    }
-
-    hb_bytes_t compile_point_set (const hb_vector_t<bool> &point_indices)
-    {
-      unsigned num_points = 0;
-      for (bool i : point_indices)
-        if (i) num_points++;
-
-      unsigned indices_length = point_indices.length;
-      /* If the points set consists of all points in the glyph, it's encoded with a
-       * single zero byte */
-      if (num_points == indices_length)
-      {
-        char *p = (char *) hb_calloc (1, sizeof (char));
-        if (unlikely (!p)) return hb_bytes_t ();
-
-        return hb_bytes_t (p, 1);
-      }
-
-      /* allocate enough memories: 2 bytes for count + 3 bytes for each point */
-      unsigned num_bytes = 2 + 3 *num_points;
-      char *p = (char *) hb_calloc (num_bytes, sizeof (char));
-      if (unlikely (!p)) return hb_bytes_t ();
-
-      unsigned pos = 0;
-      /* binary data starts with the total number of reference points */
-      if (num_points < 0x80)
-        p[pos++] = num_points;
-      else
-      {
-        p[pos++] = ((num_points >> 8) | 0x80);
-        p[pos++] = num_points & 0xFF;
-      }
-
-      const unsigned max_run_length = 0x7F;
-      unsigned i = 0;
-      unsigned last_value = 0;
-      unsigned num_encoded = 0;
-      while (i < indices_length && num_encoded < num_points)
-      {
-        unsigned run_length = 0;
-        unsigned header_pos = pos;
-        p[pos++] = 0;
-
-        bool use_byte_encoding = false;
-        bool new_run = true;
-        while (i < indices_length && num_encoded < num_points &&
-               run_length <= max_run_length)
-        {
-          // find out next referenced point index
-          while (i < indices_length && !point_indices[i])
-            i++;
-
-          if (i >= indices_length) break;
-
-          unsigned cur_value = i;
-          unsigned delta = cur_value - last_value;
-
-          if (new_run)
-          {
-            use_byte_encoding = (delta <= 0xFF);
-            new_run = false;
-          }
-
-          if (use_byte_encoding && delta > 0xFF)
-            break;
-
-          if (use_byte_encoding)
-            p[pos++] = delta;
-          else
-          {
-            p[pos++] = delta >> 8;
-            p[pos++] = delta & 0xFF;
-          }
-          i++;
-          last_value = cur_value;
-          run_length++;
-          num_encoded++;
-        }
-
-        if (use_byte_encoding)
-          p[header_pos] = run_length - 1;
-        else
-          p[header_pos] = (run_length - 1) | 0x80;
-      }
-      return hb_bytes_t (p, pos);
+      return true;
     }
 
     /* compile all point set and store byte data in a point_set->hb_bytes_t hashmap,
@@ -1402,11 +1504,11 @@ struct TupleVariationData
           continue;
         }
         
-        hb_bytes_t compiled_data = compile_point_set (*points_set);
-        if (unlikely (compiled_data == hb_bytes_t ()))
+        hb_vector_t<char> compiled_point_data;
+        if (!tuple_delta_t::compile_point_set (*points_set, compiled_point_data))
           return false;
         
-        if (!point_data_map.set (points_set, compiled_data) ||
+        if (!point_data_map.set (points_set, std::move (compiled_point_data)) ||
             !point_set_count_map.set (points_set, 1))
           return false;
       }
@@ -1414,31 +1516,33 @@ struct TupleVariationData
     }
 
     /* find shared points set which saves most bytes */
-    hb_bytes_t find_shared_points ()
+    void find_shared_points ()
     {
       unsigned max_saved_bytes = 0;
-      hb_bytes_t res{};
 
-      for (const auto& _ : point_data_map.iter ())
+      for (const auto& _ : point_data_map.iter_ref ())
       {
         const hb_vector_t<bool>* points_set = _.first;
         unsigned data_length = _.second.length;
+        if (!data_length) continue;
         unsigned *count;
         if (unlikely (!point_set_count_map.has (points_set, &count) ||
                       *count <= 1))
-          return hb_bytes_t ();
+        {
+          shared_points_bytes = nullptr;
+          return;
+        }
 
         unsigned saved_bytes = data_length * ((*count) -1);
         if (saved_bytes > max_saved_bytes)
         {
           max_saved_bytes = saved_bytes;
-          res = _.second;
+          shared_points_bytes = &(_.second);
         }
       }
-      return res;
     }
 
-    bool calc_inferred_deltas (contour_point_vector_t& contour_points)
+    bool calc_inferred_deltas (const contour_point_vector_t& contour_points)
     {
       for (tuple_delta_t& var : tuple_vars)
         if (!var.calc_inferred_deltas (contour_points))
@@ -1447,10 +1551,21 @@ struct TupleVariationData
       return true;
     }
 
+    bool iup_optimize (const contour_point_vector_t& contour_points)
+    {
+      for (tuple_delta_t& var : tuple_vars)
+      {
+        if (!var.optimize (contour_points, is_composite))
+          return false;
+      }
+      return true;
+    }
+
     public:
     bool instantiate (const hb_hashmap_t<hb_tag_t, Triple>& normalized_axes_location,
                       const hb_hashmap_t<hb_tag_t, TripleDistances>& axes_triple_distances,
-                      contour_point_vector_t* contour_points = nullptr)
+                      contour_point_vector_t* contour_points = nullptr,
+                      bool optimize = false)
     {
       if (!tuple_vars) return true;
       if (!change_tuple_variations_axis_limits (normalized_axes_location, axes_triple_distances))
@@ -1460,7 +1575,14 @@ struct TupleVariationData
         if (!calc_inferred_deltas (*contour_points))
           return false;
 
-      merge_tuple_variations ();
+      /* if iup delta opt is on, contour_points can't be null */
+      if (optimize && !contour_points)
+        return false;
+
+      if (!merge_tuple_variations (optimize ? contour_points : nullptr))
+        return false;
+
+      if (optimize && !iup_optimize (*contour_points)) return false;
       return !tuple_vars.in_error ();
     }
 
@@ -1475,21 +1597,27 @@ struct TupleVariationData
 
       if (use_shared_points)
       {
-        shared_points_bytes = find_shared_points ();
-        compiled_byte_size += shared_points_bytes.length;
+        find_shared_points ();
+        if (shared_points_bytes)
+          compiled_byte_size += shared_points_bytes->length;
       }
       // compile delta and tuple var header for each tuple variation
       for (auto& tuple: tuple_vars)
       {
         const hb_vector_t<bool>* points_set = &(tuple.indices);
-        hb_bytes_t *points_data;
+        hb_vector_t<char> *points_data;
         if (unlikely (!point_data_map.has (points_set, &points_data)))
           return false;
 
+        /* when iup optimization is enabled, num of referenced points could be 0
+         * and thus the compiled points bytes is empty, we should skip compiling
+         * this tuple */
+        if (!points_data->length)
+          continue;
         if (!tuple.compile_deltas ())
           return false;
 
-        unsigned points_data_length = (*points_data != shared_points_bytes) ? points_data->length : 0;
+        unsigned points_data_length = (points_data != shared_points_bytes) ? points_data->length : 0;
         if (!tuple.compile_tuple_var_header (axes_index_map, points_data_length, axes_old_index_tag_map,
                                              shared_tuples_idx_map))
           return false;
@@ -1513,18 +1641,24 @@ struct TupleVariationData
     bool serialize_var_data (hb_serialize_context_t *c, bool is_gvar) const
     {
       TRACE_SERIALIZE (this);
-      if (is_gvar)
-        shared_points_bytes.copy (c);
+      if (is_gvar && shared_points_bytes)
+      {
+        hb_bytes_t s (shared_points_bytes->arrayZ, shared_points_bytes->length);
+        s.copy (c);
+      }
 
       for (const auto& tuple: tuple_vars)
       {
         const hb_vector_t<bool>* points_set = &(tuple.indices);
-        hb_bytes_t *point_data;
+        hb_vector_t<char> *point_data;
         if (!point_data_map.has (points_set, &point_data))
           return_trace (false);
 
-        if (!is_gvar || *point_data != shared_points_bytes)
-          point_data->copy (c);
+        if (!is_gvar || point_data != shared_points_bytes)
+        {
+          hb_bytes_t s (point_data->arrayZ, point_data->length);
+          s.copy (c);
+        }
 
         tuple.compiled_deltas.as_array ().copy (c);
         if (c->in_error ()) return_trace (false);
@@ -1711,13 +1845,15 @@ struct TupleVariationData
                                    const hb_map_t *axes_old_index_tag_map,
                                    const hb_vector_t<unsigned> &shared_indices,
                                    const hb_array_t<const F2DOT14> shared_tuples,
-                                   tuple_variations_t& tuple_variations /* OUT */) const
+                                   tuple_variations_t& tuple_variations, /* OUT */
+                                   bool is_composite_glyph = false) const
   {
     return tuple_variations.create_from_tuple_var_data (iterator, tupleVarCount,
                                                         point_count, is_gvar,
                                                         axes_old_index_tag_map,
                                                         shared_indices,
-                                                        shared_tuples);
+                                                        shared_tuples,
+                                                        is_composite_glyph);
   }
 
   bool serialize (hb_serialize_context_t *c,
@@ -1831,7 +1967,7 @@ struct item_variations_t
   const hb_map_t& get_varidx_map () const
   { return varidx_map; }
 
-  bool instantiate (const VariationStore& varStore,
+  bool instantiate (const ItemVariationStore& varStore,
                     const hb_subset_plan_t *plan,
                     bool optimize=true,
                     bool use_no_variation_idx=true,
@@ -1845,7 +1981,7 @@ struct item_variations_t
   }
 
   /* keep below APIs public only for unit test: test-item-varstore */
-  bool create_from_item_varstore (const VariationStore& varStore,
+  bool create_from_item_varstore (const ItemVariationStore& varStore,
                                   const hb_map_t& axes_old_index_tag_map,
                                   const hb_array_t <const hb_inc_bimap_t> inner_maps = hb_array_t<const hb_inc_bimap_t> ())
   {
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh
index 1c7a1f6c1e..59aad57e37 100644
--- a/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-var-gvar-table.hh
@@ -101,10 +101,15 @@ struct glyph_variations_t
         continue;
       }
 
+      bool is_composite_glyph = false;
+#ifdef HB_EXPERIMENTAL_API
+      is_composite_glyph = plan->composite_new_gids.has (new_gid);
+#endif
       if (!p->decompile_tuple_variations (all_contour_points->length, true /* is_gvar */,
                                           iterator, &(plan->axes_old_index_tag_map),
                                           shared_indices, shared_tuples,
-                                          tuple_vars /* OUT */))
+                                          tuple_vars, /* OUT */
+                                          is_composite_glyph))
         return false;
       glyph_variations.push (std::move (tuple_vars));
     }
@@ -114,13 +119,17 @@ struct glyph_variations_t
   bool instantiate (const hb_subset_plan_t *plan)
   {
     unsigned count = plan->new_to_old_gid_list.length;
+    bool iup_optimize = false;
+#ifdef HB_EXPERIMENTAL_API
+    iup_optimize = plan->flags & HB_SUBSET_FLAGS_OPTIMIZE_IUP_DELTAS;
+#endif
     for (unsigned i = 0; i < count; i++)
     {
       hb_codepoint_t new_gid = plan->new_to_old_gid_list[i].first;
       contour_point_vector_t *all_points;
       if (!plan->new_gid_contour_points_map.has (new_gid, &all_points))
         return false;
-      if (!glyph_variations[i].instantiate (plan->axes_location, plan->axes_triple_distances, all_points))
+      if (!glyph_variations[i].instantiate (plan->axes_location, plan->axes_triple_distances, all_points, iup_optimize))
         return false;
     }
     return true;
@@ -340,7 +349,8 @@ struct gvar
                   const glyph_variations_t& glyph_vars,
                   Iterator it,
                   unsigned axis_count,
-                  unsigned num_glyphs) const
+                  unsigned num_glyphs,
+                  bool force_long_offsets) const
   {
     TRACE_SERIALIZE (this);
     gvar *out = c->allocate_min<gvar> ();
@@ -352,7 +362,7 @@ struct gvar
     out->glyphCountX = hb_min (0xFFFFu, num_glyphs);
 
     unsigned glyph_var_data_size = glyph_vars.compiled_byte_size ();
-    bool long_offset = glyph_var_data_size & ~0xFFFFu;
+    bool long_offset = glyph_var_data_size & ~0xFFFFu || force_long_offsets;
     out->flags = long_offset ? 1 : 0;
 
     HBUINT8 *glyph_var_data_offsets = c->allocate_size<HBUINT8> ((long_offset ? 4 : 2) * (num_glyphs + 1), false);
@@ -393,7 +403,12 @@ struct gvar
     unsigned axis_count = c->plan->axes_index_map.get_population ();
     unsigned num_glyphs = c->plan->num_output_glyphs ();
     auto it = hb_iter (c->plan->new_to_old_gid_list);
-    return_trace (serialize (c->serializer, glyph_vars, it, axis_count, num_glyphs));
+
+    bool force_long_offsets = false;
+#ifdef HB_EXPERIMENTAL_API
+    force_long_offsets = c->plan->flags & HB_SUBSET_FLAGS_IFTB_REQUIREMENTS;
+#endif
+    return_trace (serialize (c->serializer, glyph_vars, it, axis_count, num_glyphs, force_long_offsets));
   }
 
   bool subset (hb_subset_context_t *c) const
@@ -429,7 +444,7 @@ struct gvar
     }
 
     bool long_offset = (subset_data_size & ~0xFFFFu);
- #ifdef HB_EXPERIMENTAL_API
+#ifdef HB_EXPERIMENTAL_API
     long_offset = long_offset || (c->plan->flags & HB_SUBSET_FLAGS_IFTB_REQUIREMENTS);
 #endif
     out->flags = long_offset ? 1 : 0;
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh
index 53a4642d38..33a4e1a40e 100644
--- a/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-var-hvar-table.hh
@@ -188,7 +188,7 @@ struct hvarvvar_subset_plan_t
   ~hvarvvar_subset_plan_t() { fini (); }
 
   void init (const hb_array_t<const DeltaSetIndexMap *> &index_maps,
-	     const VariationStore &_var_store,
+	     const ItemVariationStore &_var_store,
 	     const hb_subset_plan_t *plan)
   {
     index_map_plans.resize (index_maps.length);
@@ -263,7 +263,7 @@ struct hvarvvar_subset_plan_t
   hb_inc_bimap_t outer_map;
   hb_vector_t<hb_inc_bimap_t> inner_maps;
   hb_vector_t<index_map_subset_plan_t> index_map_plans;
-  const VariationStore *var_store;
+  const ItemVariationStore *var_store;
 
   protected:
   hb_vector_t<hb_set_t *> inner_sets;
@@ -296,7 +296,7 @@ struct HVARVVAR
 		  rsbMap.sanitize (c, this));
   }
 
-  const VariationStore& get_var_store () const
+  const ItemVariationStore& get_var_store () const
   { return this+varStore; }
 
   void listup_index_maps (hb_vector_t<const DeltaSetIndexMap *> &index_maps) const
@@ -384,7 +384,7 @@ struct HVARVVAR
 
   float get_advance_delta_unscaled (hb_codepoint_t  glyph,
 				    const int *coords, unsigned int coord_count,
-				    VariationStore::cache_t *store_cache = nullptr) const
+				    ItemVariationStore::cache_t *store_cache = nullptr) const
   {
     uint32_t varidx = (this+advMap).map (glyph);
     return (this+varStore).get_delta (varidx,
@@ -405,7 +405,7 @@ struct HVARVVAR
   public:
   FixedVersion<>version;	/* Version of the metrics variation table
 				 * initially set to 0x00010000u */
-  Offset32To<VariationStore>
+  Offset32To<ItemVariationStore>
 		varStore;	/* Offset to item variation store table. */
   Offset32To<DeltaSetIndexMap>
 		advMap;		/* Offset to advance var-idx mapping. */
diff --git a/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh b/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh
index 6d69777618..1f0401d1d3 100644
--- a/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh
+++ b/thirdparty/harfbuzz/src/hb-ot-var-mvar-table.hh
@@ -56,7 +56,7 @@ struct VariationValueRecord
 
   public:
   Tag		valueTag;	/* Four-byte tag identifying a font-wide measure. */
-  VarIdx	varIdx;		/* Outer/inner index into VariationStore item. */
+  VarIdx	varIdx;		/* Outer/inner index into ItemVariationStore item. */
 
   public:
   DEFINE_SIZE_STATIC (8);
@@ -106,7 +106,7 @@ struct MVAR
     out->valueRecordCount = valueRecordCount;
 
     item_variations_t item_vars;
-    const VariationStore& src_var_store = this+varStore;
+    const ItemVariationStore& src_var_store = this+varStore;
 
     if (!item_vars.instantiate (src_var_store, c->plan))
       return_trace (false);
@@ -159,7 +159,7 @@ protected:
   HBUINT16	valueRecordSize;/* The size in bytes of each value record —
 				 * must be greater than zero. */
   HBUINT16	valueRecordCount;/* The number of value records — may be zero. */
-  Offset16To<VariationStore>
+  Offset16To<ItemVariationStore>
 		varStore;	/* Offset to item variation store table. */
   UnsizedArrayOf<HBUINT8>
 		valuesZ;	/* Array of value records. The records must be
diff --git a/thirdparty/harfbuzz/src/hb-priority-queue.hh b/thirdparty/harfbuzz/src/hb-priority-queue.hh
index 9b962a29d9..274d5df4c5 100644
--- a/thirdparty/harfbuzz/src/hb-priority-queue.hh
+++ b/thirdparty/harfbuzz/src/hb-priority-queue.hh
@@ -163,7 +163,7 @@ struct hb_priority_queue_t
     goto repeat;
   }
 
-  void swap (unsigned a, unsigned b)
+  void swap (unsigned a, unsigned b) noexcept
   {
     assert (a < heap.length);
     assert (b < heap.length);
diff --git a/thirdparty/harfbuzz/src/hb-repacker.hh b/thirdparty/harfbuzz/src/hb-repacker.hh
index e9cd376ad3..ed40f271cc 100644
--- a/thirdparty/harfbuzz/src/hb-repacker.hh
+++ b/thirdparty/harfbuzz/src/hb-repacker.hh
@@ -239,6 +239,54 @@ bool _try_isolating_subgraphs (const hb_vector_t<graph::overflow_record_t>& over
 }
 
 static inline
+bool _resolve_shared_overflow(const hb_vector_t<graph::overflow_record_t>& overflows,
+                              int overflow_index,
+                              graph_t& sorted_graph)
+{
+  const graph::overflow_record_t& r = overflows[overflow_index];
+
+  // Find all of the parents in overflowing links that link to this
+  // same child node. We will then try duplicating the child node and
+  // re-assigning all of these parents to the duplicate.
+  hb_set_t parents;
+  parents.add(r.parent);
+  for (int i = overflow_index - 1; i >= 0; i--) {
+    const graph::overflow_record_t& r2 = overflows[i];
+    if (r2.child == r.child) {
+      parents.add(r2.parent);
+    }
+  }
+
+  unsigned result = sorted_graph.duplicate(&parents, r.child);
+  if (result == (unsigned) -1 && parents.get_population() > 2) {
+    // All links to the child are overflowing, so we can't include all
+    // in the duplication. Remove one parent from the duplication.
+    // Remove the lowest index parent, which will be the closest to the child.
+    parents.del(parents.get_min());
+    result = sorted_graph.duplicate(&parents, r.child);
+  }
+
+  if (result == (unsigned) -1) return result;
+
+  if (parents.get_population() > 1) {
+    // If the duplicated node has more than one parent pre-emptively raise it's priority to the maximum.
+    // This will place it close to the parents. Node's with only one parent, don't need this as normal overflow
+    // resolution will raise priority if needed.
+    //
+    // Reasoning: most of the parents to this child are likely at the same layer in the graph. Duplicating
+    // the child will theoretically allow it to be placed closer to it's parents. However, due to the shortest
+    // distance sort by default it's placement will remain in the same layer, thus it will remain in roughly the
+    // same position (and distance from parents) as the original child node. The overflow resolution will attempt
+    // to move nodes closer, but only for non-shared nodes. Since this node is shared, it will simply be given
+    // further duplication which defeats the attempt to duplicate with multiple parents. To fix this we
+    // pre-emptively raise priority now which allows the duplicated node to pack into the same layer as it's parents.
+    sorted_graph.vertices_[result].give_max_priority();
+  }
+
+  return result;
+}
+
+static inline
 bool _process_overflows (const hb_vector_t<graph::overflow_record_t>& overflows,
                          hb_set_t& priority_bumped_parents,
                          graph_t& sorted_graph)
@@ -254,7 +302,7 @@ bool _process_overflows (const hb_vector_t<graph::overflow_record_t>& overflows,
     {
       // The child object is shared, we may be able to eliminate the overflow
       // by duplicating it.
-      if (sorted_graph.duplicate (r.parent, r.child) == (unsigned) -1) continue;
+      if (!_resolve_shared_overflow(overflows, i, sorted_graph)) continue;
       return true;
     }
 
@@ -388,7 +436,7 @@ template<typename T>
 inline hb_blob_t*
 hb_resolve_overflows (const T& packed,
                       hb_tag_t table_tag,
-                      unsigned max_rounds = 20,
+                      unsigned max_rounds = 32,
                       bool recalculate_extensions = false) {
   graph_t sorted_graph (packed);
   if (sorted_graph.in_error ())
diff --git a/thirdparty/harfbuzz/src/hb-serialize.hh b/thirdparty/harfbuzz/src/hb-serialize.hh
index 15eccb6a09..e988451eb3 100644
--- a/thirdparty/harfbuzz/src/hb-serialize.hh
+++ b/thirdparty/harfbuzz/src/hb-serialize.hh
@@ -91,7 +91,27 @@ struct hb_serialize_context_t
     }
 #endif
 
-    friend void swap (object_t& a, object_t& b)
+    bool add_virtual_link (objidx_t objidx)
+    {
+      if (!objidx)
+        return false;
+
+      auto& link = *virtual_links.push ();
+      if (virtual_links.in_error ())
+        return false;
+
+      link.objidx = objidx;
+      // Remaining fields were previously zero'd by push():
+      // link.width = 0;
+      // link.is_signed = 0;
+      // link.whence = 0;
+      // link.position = 0;
+      // link.bias = 0;
+
+      return true;
+    }
+
+    friend void swap (object_t& a, object_t& b) noexcept
     {
       hb_swap (a.head, b.head);
       hb_swap (a.tail, b.tail);
@@ -156,9 +176,9 @@ struct hb_serialize_context_t
     object_t *next;
 
     auto all_links () const HB_AUTO_RETURN
-        (( hb_concat (this->real_links, this->virtual_links) ));
+        (( hb_concat (real_links, virtual_links) ));
     auto all_links_writer () HB_AUTO_RETURN
-        (( hb_concat (this->real_links.writer (), this->virtual_links.writer ()) ));
+        (( hb_concat (real_links.writer (), virtual_links.writer ()) ));           
   };
 
   struct snapshot_t
@@ -469,16 +489,40 @@ struct hb_serialize_context_t
 
     assert (current);
 
-    auto& link = *current->virtual_links.push ();
-    if (current->virtual_links.in_error ())
+    if (!current->add_virtual_link(objidx))
       err (HB_SERIALIZE_ERROR_OTHER);
+  }
 
-    link.width = 0;
-    link.objidx = objidx;
-    link.is_signed = 0;
-    link.whence = 0;
-    link.position = 0;
-    link.bias = 0;
+  objidx_t last_added_child_index() const {
+    if (unlikely (in_error ())) return (objidx_t) -1;
+
+    assert (current);
+    if (!bool(current->real_links)) {
+      return (objidx_t) -1;
+    }
+
+    return current->real_links[current->real_links.length - 1].objidx;
+  }
+
+  // For the current object ensure that the sub-table bytes for child objidx are always placed
+  // after the subtable bytes for any other existing children. This only ensures that the
+  // repacker will not move the target subtable before the other children
+  // (by adding virtual links). It is up to the caller to ensure the initial serialization
+  // order is correct.
+  void repack_last(objidx_t objidx) {
+    if (unlikely (in_error ())) return;
+
+    if (!objidx)
+      return;
+
+    assert (current);
+    for (auto& l : current->real_links) {
+      if (l.objidx == objidx) {
+        continue;
+      }
+
+      packed[l.objidx]->add_virtual_link(objidx);
+    }
   }
 
   template <typename T>
diff --git a/thirdparty/harfbuzz/src/hb-set.hh b/thirdparty/harfbuzz/src/hb-set.hh
index ff2a170d2d..ce69ea2c9b 100644
--- a/thirdparty/harfbuzz/src/hb-set.hh
+++ b/thirdparty/harfbuzz/src/hb-set.hh
@@ -44,10 +44,10 @@ struct hb_sparseset_t
   ~hb_sparseset_t () { fini (); }
 
   hb_sparseset_t (const hb_sparseset_t& other) : hb_sparseset_t () { set (other); }
-  hb_sparseset_t (hb_sparseset_t&& other) : hb_sparseset_t () { s = std::move (other.s); }
+  hb_sparseset_t (hb_sparseset_t&& other)  noexcept : hb_sparseset_t () { s = std::move (other.s); }
   hb_sparseset_t& operator = (const hb_sparseset_t& other) { set (other); return *this; }
-  hb_sparseset_t& operator = (hb_sparseset_t&& other) { s = std::move (other.s); return *this; }
-  friend void swap (hb_sparseset_t& a, hb_sparseset_t& b) { hb_swap (a.s, b.s); }
+  hb_sparseset_t& operator = (hb_sparseset_t&& other)  noexcept { s = std::move (other.s); return *this; }
+  friend void swap (hb_sparseset_t& a, hb_sparseset_t& b)  noexcept { hb_swap (a.s, b.s); }
 
   hb_sparseset_t (std::initializer_list<hb_codepoint_t> lst) : hb_sparseset_t ()
   {
@@ -166,7 +166,7 @@ struct hb_set_t : hb_sparseset_t<hb_bit_set_invertible_t>
   ~hb_set_t () = default;
   hb_set_t () : sparseset () {};
   hb_set_t (const hb_set_t &o) : sparseset ((sparseset &) o) {};
-  hb_set_t (hb_set_t&& o) : sparseset (std::move ((sparseset &) o)) {}
+  hb_set_t (hb_set_t&& o)  noexcept : sparseset (std::move ((sparseset &) o)) {}
   hb_set_t& operator = (const hb_set_t&) = default;
   hb_set_t& operator = (hb_set_t&&) = default;
   hb_set_t (std::initializer_list<hb_codepoint_t> lst) : sparseset (lst) {}
diff --git a/thirdparty/harfbuzz/src/hb-subset-cff2.cc b/thirdparty/harfbuzz/src/hb-subset-cff2.cc
index abc108e571..eb5cb0c625 100644
--- a/thirdparty/harfbuzz/src/hb-subset-cff2.cc
+++ b/thirdparty/harfbuzz/src/hb-subset-cff2.cc
@@ -248,7 +248,7 @@ struct cff2_subr_subsetter_t : subr_subsetter_t<cff2_subr_subsetter_t, CFF2Subrs
 struct cff2_private_blend_encoder_param_t
 {
   cff2_private_blend_encoder_param_t (hb_serialize_context_t *c,
-				      const CFF2VariationStore *varStore,
+				      const CFF2ItemVariationStore *varStore,
 				      hb_array_t<int> normalized_coords) :
     c (c), varStore (varStore), normalized_coords (normalized_coords) {}
 
@@ -284,7 +284,7 @@ struct cff2_private_blend_encoder_param_t
   unsigned ivs = 0;
   unsigned region_count = 0;
   hb_vector_t<float> scalars;
-  const	 CFF2VariationStore *varStore = nullptr;
+  const	 CFF2ItemVariationStore *varStore = nullptr;
   hb_array_t<int> normalized_coords;
 };
 
@@ -378,7 +378,7 @@ struct cff2_private_dict_blend_opset_t : dict_opset_t
 struct cff2_private_dict_op_serializer_t : op_serializer_t
 {
   cff2_private_dict_op_serializer_t (bool desubroutinize_, bool drop_hints_, bool pinned_,
-				     const CFF::CFF2VariationStore* varStore_,
+				     const CFF::CFF2ItemVariationStore* varStore_,
 				     hb_array_t<int> normalized_coords_)
     : desubroutinize (desubroutinize_), drop_hints (drop_hints_), pinned (pinned_),
       varStore (varStore_), normalized_coords (normalized_coords_) {}
@@ -416,7 +416,7 @@ struct cff2_private_dict_op_serializer_t : op_serializer_t
   const bool desubroutinize;
   const bool drop_hints;
   const bool pinned;
-  const CFF::CFF2VariationStore* varStore;
+  const CFF::CFF2ItemVariationStore* varStore;
   hb_array_t<int> normalized_coords;
 };
 
@@ -628,10 +628,10 @@ OT::cff2::accelerator_subset_t::serialize (hb_serialize_context_t *c,
   }
 
   /* variation store */
-  if (varStore != &Null (CFF2VariationStore) &&
+  if (varStore != &Null (CFF2ItemVariationStore) &&
       !plan.pinned)
   {
-    auto *dest = c->push<CFF2VariationStore> ();
+    auto *dest = c->push<CFF2ItemVariationStore> ();
     if (unlikely (!dest->serialize (c, varStore)))
     {
       c->pop_discard ();
diff --git a/thirdparty/harfbuzz/src/hb-subset-input.cc b/thirdparty/harfbuzz/src/hb-subset-input.cc
index 1e0a89a630..68a3e77788 100644
--- a/thirdparty/harfbuzz/src/hb-subset-input.cc
+++ b/thirdparty/harfbuzz/src/hb-subset-input.cc
@@ -24,6 +24,7 @@
  * Google Author(s): Garret Rieger, Rod Sheeter, Behdad Esfahbod
  */
 
+#include "hb-subset-instancer-solver.hh"
 #include "hb-subset.hh"
 #include "hb-set.hh"
 #include "hb-utf.hh"
@@ -50,7 +51,6 @@ hb_subset_input_t::hb_subset_input_t ()
     HB_TAG ('k', 'e', 'r', 'n'),
 
     // Copied from fontTools:
-    HB_TAG ('B', 'A', 'S', 'E'),
     HB_TAG ('J', 'S', 'T', 'F'),
     HB_TAG ('D', 'S', 'I', 'G'),
     HB_TAG ('E', 'B', 'D', 'T'),
@@ -418,6 +418,46 @@ hb_subset_input_keep_everything (hb_subset_input_t *input)
 
 #ifndef HB_NO_VAR
 /**
+ * hb_subset_input_pin_all_axes_to_default: (skip)
+ * @input: a #hb_subset_input_t object.
+ * @face: a #hb_face_t object.
+ *
+ * Pin all axes to default locations in the given subset input object.
+ *
+ * All axes in a font must be pinned. Additionally, `CFF2` table, if present,
+ * will be de-subroutinized.
+ *
+ * Return value: `true` if success, `false` otherwise
+ *
+ * Since: 8.3.1
+ **/
+HB_EXTERN hb_bool_t
+hb_subset_input_pin_all_axes_to_default (hb_subset_input_t  *input,
+                                         hb_face_t          *face)
+{
+  unsigned axis_count = hb_ot_var_get_axis_count (face);
+  if (!axis_count) return false;
+
+  hb_ot_var_axis_info_t *axis_infos = (hb_ot_var_axis_info_t *) hb_calloc (axis_count, sizeof (hb_ot_var_axis_info_t));
+  if (unlikely (!axis_infos)) return false;
+
+  (void) hb_ot_var_get_axis_infos (face, 0, &axis_count, axis_infos);
+
+  for (unsigned i = 0; i < axis_count; i++)
+  {
+    hb_tag_t axis_tag = axis_infos[i].tag;
+    float default_val = axis_infos[i].default_value;
+    if (!input->axes_location.set (axis_tag, Triple (default_val, default_val, default_val)))
+    {
+      hb_free (axis_infos);
+      return false;
+    }
+  }
+  hb_free (axis_infos);
+  return true;
+}
+
+/**
  * hb_subset_input_pin_axis_to_default: (skip)
  * @input: a #hb_subset_input_t object.
  * @face: a #hb_face_t object.
@@ -481,16 +521,13 @@ hb_subset_input_pin_axis_location (hb_subset_input_t  *input,
  * @input: a #hb_subset_input_t object.
  * @face: a #hb_face_t object.
  * @axis_tag: Tag of the axis
- * @axis_min_value: Minimum value of the axis variation range to set
- * @axis_max_value: Maximum value of the axis variation range to set
- * @axis_def_value: Default value of the axis variation range to set, in case of
- * null, it'll be determined automatically
+ * @axis_min_value: Minimum value of the axis variation range to set, if NaN the existing min will be used.
+ * @axis_max_value: Maximum value of the axis variation range to set  if NaN the existing max will be used.
+ * @axis_def_value: Default value of the axis variation range to set, if NaN the existing default will be used.
  *
  * Restricting the range of variation on an axis in the given subset input object.
  * New min/default/max values will be clamped if they're not within the fvar axis range.
- * If the new default value is null:
- * If the fvar axis default value is within the new range, then new default
- * value is the same as original default value.
+ *
  * If the fvar axis default value is not within the new range, the new default
  * value will be changed to the new min or max value, whichever is closer to the fvar
  * axis default.
@@ -509,21 +546,57 @@ hb_subset_input_set_axis_range (hb_subset_input_t  *input,
                                 hb_tag_t            axis_tag,
                                 float               axis_min_value,
                                 float               axis_max_value,
-                                float              *axis_def_value /* IN, maybe NULL */)
+                                float               axis_def_value)
 {
-  if (axis_min_value > axis_max_value)
-    return false;
-
   hb_ot_var_axis_info_t axis_info;
   if (!hb_ot_var_find_axis_info (face, axis_tag, &axis_info))
     return false;
 
-  float new_min_val = hb_clamp(axis_min_value, axis_info.min_value, axis_info.max_value);
-  float new_max_val = hb_clamp(axis_max_value, axis_info.min_value, axis_info.max_value);
-  float new_default_val = axis_def_value ? *axis_def_value : axis_info.default_value;
-  new_default_val = hb_clamp(new_default_val, new_min_val, new_max_val);
+  float min = !std::isnan(axis_min_value) ? axis_min_value : axis_info.min_value;
+  float max = !std::isnan(axis_max_value) ? axis_max_value : axis_info.max_value;
+  float def = !std::isnan(axis_def_value) ? axis_def_value : axis_info.default_value;
+
+  if (min > max)
+    return false;
+
+  float new_min_val = hb_clamp(min, axis_info.min_value, axis_info.max_value);
+  float new_max_val = hb_clamp(max, axis_info.min_value, axis_info.max_value);
+  float new_default_val = hb_clamp(def, new_min_val, new_max_val);
   return input->axes_location.set (axis_tag, Triple (new_min_val, new_default_val, new_max_val));
 }
+
+/**
+ * hb_subset_input_get_axis_range: (skip)
+ * @input: a #hb_subset_input_t object.
+ * @axis_tag: Tag of the axis
+ * @axis_min_value: Set to the previously configured minimum value of the axis variation range.
+ * @axis_max_value: Set to the previously configured maximum value of the axis variation range.
+ * @axis_def_value: Set to the previously configured default value of the axis variation range.
+ *
+ * Gets the axis range assigned by previous calls to hb_subset_input_set_axis_range.
+ *
+ * Return value: `true` if a range has been set for this axis tag, `false` otherwise.
+ *
+ * XSince: EXPERIMENTAL
+ **/
+HB_EXTERN hb_bool_t
+hb_subset_input_get_axis_range (hb_subset_input_t  *input,
+				hb_tag_t            axis_tag,
+				float              *axis_min_value,
+				float              *axis_max_value,
+				float              *axis_def_value)
+
+{
+  Triple* triple;
+  if (!input->axes_location.has(axis_tag, &triple)) {
+    return false;
+  }
+
+  *axis_min_value = triple->minimum;
+  *axis_def_value = triple->middle;
+  *axis_max_value = triple->maximum;
+  return true;
+}
 #endif
 #endif
 
diff --git a/thirdparty/harfbuzz/src/hb-subset-instancer-iup.cc b/thirdparty/harfbuzz/src/hb-subset-instancer-iup.cc
new file mode 100644
index 0000000000..35a964d082
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-instancer-iup.cc
@@ -0,0 +1,532 @@
+/*
+ * Copyright © 2024  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#include "hb-subset-instancer-iup.hh"
+
+/* This file is a straight port of the following:
+ *
+ * https://github.com/fonttools/fonttools/blob/main/Lib/fontTools/varLib/iup.py
+ *
+ * Where that file returns optimzied deltas vector, we return optimized
+ * referenced point indices.
+ */
+
+constexpr static unsigned MAX_LOOKBACK = 8;
+
+static void _iup_contour_bound_forced_set (const hb_array_t<const contour_point_t> contour_points,
+                                           const hb_array_t<const int> x_deltas,
+                                           const hb_array_t<const int> y_deltas,
+                                           hb_set_t& forced_set, /* OUT */
+                                           float tolerance = 0.f)
+{
+  unsigned len = contour_points.length;
+  unsigned next_i = 0;
+  for (int i = len - 1; i >= 0; i--)
+  {
+    unsigned last_i = (len + i -1) % len;
+    for (unsigned j = 0; j < 2; j++)
+    {
+      float cj, lcj, ncj;
+      int dj, ldj, ndj;
+      if (j == 0)
+      {
+        cj = contour_points.arrayZ[i].x;
+        dj = x_deltas.arrayZ[i];
+        lcj = contour_points.arrayZ[last_i].x;
+        ldj = x_deltas.arrayZ[last_i];
+        ncj = contour_points.arrayZ[next_i].x;
+        ndj = x_deltas.arrayZ[next_i];
+      }
+      else
+      {
+        cj = contour_points.arrayZ[i].y;
+        dj = y_deltas.arrayZ[i];
+        lcj = contour_points.arrayZ[last_i].y;
+        ldj = y_deltas.arrayZ[last_i];
+        ncj = contour_points.arrayZ[next_i].y;
+        ndj = y_deltas.arrayZ[next_i];
+      }
+
+      float c1, c2;
+      int d1, d2;
+      if (lcj <= ncj)
+      {
+        c1 = lcj;
+        c2 = ncj;
+        d1 = ldj;
+        d2 = ndj;
+      }
+      else
+      {
+        c1 = ncj;
+        c2 = lcj;
+        d1 = ndj;
+        d2 = ldj;
+      }
+
+      bool force = false;
+      if (c1 == c2)
+      {
+        if (abs (d1 - d2) > tolerance && abs (dj) > tolerance)
+          force = true;
+      }
+      else if (c1 <= cj && cj <= c2)
+      {
+        if (!(hb_min (d1, d2) - tolerance <= dj &&
+              dj <= hb_max (d1, d2) + tolerance))
+          force = true;
+      }
+      else
+      {
+        if (d1 != d2)
+        {
+          if (cj < c1)
+          {
+            if (abs (dj) > tolerance &&
+                abs (dj - d1) > tolerance &&
+                ((dj - tolerance < d1) != (d1 < d2)))
+                force = true;
+          }
+          else
+          {
+            if (abs (dj) > tolerance &&
+                abs (dj - d2) > tolerance &&
+                ((d2 < dj + tolerance) != (d1 < d2)))
+              force = true;
+          }
+        }
+      }
+
+      if (force)
+      {
+        forced_set.add (i);
+        break;
+      }
+    }
+    next_i = i;
+  }
+}
+
+template <typename T,
+          hb_enable_if (hb_is_trivially_copyable (T))>
+static bool rotate_array (const hb_array_t<const T>& org_array,
+                          int k,
+                          hb_vector_t<T>& out)
+{
+  unsigned n = org_array.length;
+  if (!n) return true;
+  if (unlikely (!out.resize (n, false)))
+    return false;
+
+  unsigned item_size = hb_static_size (T);
+  if (k < 0)
+    k = n - (-k) % n;
+  else
+    k %= n;
+
+  hb_memcpy ((void *) out.arrayZ, (const void *) (org_array.arrayZ + n - k), k * item_size);
+  hb_memcpy ((void *) (out.arrayZ + k), (const void *) org_array.arrayZ, (n - k) * item_size);
+  return true;
+}
+
+static bool rotate_set (const hb_set_t& org_set,
+                        int k,
+                        unsigned n,
+                        hb_set_t& out)
+{
+  if (!n) return false;
+  k %= n;
+  if (k < 0)
+    k = n + k;
+
+  if (k == 0)
+  {
+    out.set (org_set);
+  }
+  else
+  {
+    for (auto v : org_set)
+      out.add ((v + k) % n);
+  }
+  return !out.in_error ();
+}
+
+/* Given two reference coordinates (start and end of contour_points array),
+ * output interpolated deltas for points in between */
+static bool _iup_segment (const hb_array_t<const contour_point_t> contour_points,
+                          const hb_array_t<const int> x_deltas,
+                          const hb_array_t<const int> y_deltas,
+                          const contour_point_t& p1, const contour_point_t& p2,
+                          int p1_dx, int p2_dx,
+                          int p1_dy, int p2_dy,
+                          hb_vector_t<float>& interp_x_deltas, /* OUT */
+                          hb_vector_t<float>& interp_y_deltas /* OUT */)
+{
+  unsigned n = contour_points.length;
+  if (unlikely (!interp_x_deltas.resize (n, false) ||
+                !interp_y_deltas.resize (n, false)))
+    return false;
+
+  for (unsigned j = 0; j < 2; j++)
+  {
+    float x1, x2, d1, d2;
+    float *out;
+    if (j == 0)
+    {
+      x1 = p1.x;
+      x2 = p2.x;
+      d1 = p1_dx;
+      d2 = p2_dx;
+      out = interp_x_deltas.arrayZ;
+    }
+    else
+    {
+      x1 = p1.y;
+      x2 = p2.y;
+      d1 = p1_dy;
+      d2 = p2_dy;
+      out = interp_y_deltas.arrayZ;
+    }
+
+    if (x1 == x2)
+    {
+      if (d1 == d2)
+      {
+        for (unsigned i = 0; i < n; i++)
+          out[i] = d1;
+      }
+      else
+      {
+        for (unsigned i = 0; i < n; i++)
+          out[i] = 0.f;
+      }
+      continue;
+    }
+
+    if (x1 > x2)
+    {
+      hb_swap (x1, x2);
+      hb_swap (d1, d2);
+    }
+
+    float scale = (d2 - d1) / (x2 - x1);
+    for (unsigned i = 0; i < n; i++)
+    {
+      float x = j == 0 ? contour_points.arrayZ[i].x : contour_points.arrayZ[i].y;
+      float d;
+      if (x <= x1)
+        d = d1;
+      else if (x >= x2)
+        d = d2;
+      else
+        d = d1 + (x - x1) * scale;
+
+      out[i] = d;
+    }
+  }
+  return true;
+}
+
+static bool _can_iup_in_between (const hb_array_t<const contour_point_t> contour_points,
+                                 const hb_array_t<const int> x_deltas,
+                                 const hb_array_t<const int> y_deltas,
+                                 const contour_point_t& p1, const contour_point_t& p2,
+                                 int p1_dx, int p2_dx,
+                                 int p1_dy, int p2_dy,
+                                 float tolerance)
+{
+  hb_vector_t<float> interp_x_deltas, interp_y_deltas;
+  if (!_iup_segment (contour_points, x_deltas, y_deltas,
+                     p1, p2, p1_dx, p2_dx, p1_dy, p2_dy,
+                     interp_x_deltas, interp_y_deltas))
+    return false;
+
+  unsigned num = contour_points.length;
+
+  for (unsigned i = 0; i < num; i++)
+  {
+    float dx = x_deltas.arrayZ[i] - interp_x_deltas.arrayZ[i];
+    float dy = y_deltas.arrayZ[i] - interp_y_deltas.arrayZ[i];
+  
+    if (sqrtf ((float)dx * dx + (float)dy * dy) > tolerance)
+      return false;
+  }
+  return true;
+}
+
+static bool _iup_contour_optimize_dp (const contour_point_vector_t& contour_points,
+                                      const hb_vector_t<int>& x_deltas,
+                                      const hb_vector_t<int>& y_deltas,
+                                      const hb_set_t& forced_set,
+                                      float tolerance,
+                                      unsigned lookback,
+                                      hb_vector_t<unsigned>& costs, /* OUT */
+                                      hb_vector_t<int>& chain /* OUT */)
+{
+  unsigned n = contour_points.length;
+  if (unlikely (!costs.resize (n, false) ||
+                !chain.resize (n, false)))
+    return false;
+
+  lookback = hb_min (lookback, MAX_LOOKBACK);
+
+  for (unsigned i = 0; i < n; i++)
+  {
+    unsigned best_cost = (i == 0 ? 1 : costs.arrayZ[i-1] + 1);
+    
+    costs.arrayZ[i] = best_cost;
+    chain.arrayZ[i] = (i == 0 ? -1 : i - 1);
+
+    if (i > 0 && forced_set.has (i - 1))
+      continue;
+
+    int lookback_index = hb_max ((int) i - (int) lookback + 1, -1);
+    for (int j = i - 2; j >= lookback_index; j--)
+    {
+      unsigned cost = j == -1 ? 1 : costs.arrayZ[j] + 1;
+      /* num points between i and j */
+      unsigned num_points = i - j - 1;
+      unsigned p1 = (j == -1 ? n - 1 : j);
+      if (cost < best_cost &&
+          _can_iup_in_between (contour_points.as_array ().sub_array (j + 1, num_points),
+                               x_deltas.as_array ().sub_array (j + 1, num_points),
+                               y_deltas.as_array ().sub_array (j + 1, num_points),
+                               contour_points.arrayZ[p1], contour_points.arrayZ[i],
+                               x_deltas.arrayZ[p1], x_deltas.arrayZ[i],
+                               y_deltas.arrayZ[p1], y_deltas.arrayZ[i],
+                               tolerance))
+      {
+        best_cost = cost;
+        costs.arrayZ[i] = best_cost;
+        chain.arrayZ[i] = j;
+      }
+
+      if (j > 0 && forced_set.has (j))
+        break;
+    }
+  }
+  return true;
+}
+
+static bool _iup_contour_optimize (const hb_array_t<const contour_point_t> contour_points,
+                                   const hb_array_t<const int> x_deltas,
+                                   const hb_array_t<const int> y_deltas,
+                                   hb_array_t<bool> opt_indices, /* OUT */
+                                   float tolerance = 0.f)
+{
+  unsigned n = contour_points.length;
+  if (opt_indices.length != n ||
+      x_deltas.length != n ||
+      y_deltas.length != n)
+    return false;
+
+  bool all_within_tolerance = true;
+  for (unsigned i = 0; i < n; i++)
+  {
+    int dx = x_deltas.arrayZ[i];
+    int dy = y_deltas.arrayZ[i];
+    if (sqrtf ((float)dx * dx + (float)dy * dy) > tolerance)
+    {
+      all_within_tolerance = false;
+      break;
+    }
+  }
+
+  /* If all are within tolerance distance, do nothing, opt_indices is
+   * initilized to false */
+  if (all_within_tolerance)
+    return true;
+
+  /* If there's exactly one point, return it */
+  if (n == 1)
+  {
+    opt_indices.arrayZ[0] = true;
+    return true;
+  }
+
+  /* If all deltas are exactly the same, return just one (the first one) */
+  bool all_deltas_are_equal = true;
+  for (unsigned i = 1; i < n; i++)
+    if (x_deltas.arrayZ[i] != x_deltas.arrayZ[0] ||
+        y_deltas.arrayZ[i] != y_deltas.arrayZ[0])
+    {
+      all_deltas_are_equal = false;
+      break;
+    }
+
+  if (all_deltas_are_equal)
+  {
+    opt_indices.arrayZ[0] = true;
+    return true;
+  }
+
+  /* else, solve the general problem using Dynamic Programming */
+  hb_set_t forced_set;
+  _iup_contour_bound_forced_set (contour_points, x_deltas, y_deltas, forced_set, tolerance);
+
+  if (!forced_set.is_empty ())
+  {
+    int k = n - 1 - forced_set.get_max ();
+    if (k < 0)
+      return false;
+
+    hb_vector_t<int> rot_x_deltas, rot_y_deltas;
+    contour_point_vector_t rot_points;
+    hb_set_t rot_forced_set;
+    if (!rotate_array (contour_points, k, rot_points) ||
+        !rotate_array (x_deltas, k, rot_x_deltas) ||
+        !rotate_array (y_deltas, k, rot_y_deltas) ||
+        !rotate_set (forced_set, k, n, rot_forced_set))
+      return false;
+
+    hb_vector_t<unsigned> costs;
+    hb_vector_t<int> chain;
+
+    if (!_iup_contour_optimize_dp (rot_points, rot_x_deltas, rot_y_deltas,
+                                   rot_forced_set, tolerance, n,
+                                   costs, chain))
+      return false;
+
+    hb_set_t solution;
+    int index = n - 1;
+    while (index != -1)
+    {
+      solution.add (index);
+      index = chain.arrayZ[index];
+    }
+
+    if (solution.is_empty () ||
+        forced_set.get_population () > solution.get_population ())
+      return false;
+
+    for (unsigned i : solution)
+      opt_indices.arrayZ[i] = true;
+
+    hb_vector_t<bool> rot_indices;
+    const hb_array_t<const bool> opt_indices_array (opt_indices.arrayZ, opt_indices.length);
+    rotate_array (opt_indices_array, -k, rot_indices);
+
+    for (unsigned i = 0; i < n; i++)
+      opt_indices.arrayZ[i] = rot_indices.arrayZ[i];
+  }
+  else
+  {
+    hb_vector_t<int> repeat_x_deltas, repeat_y_deltas;
+    contour_point_vector_t repeat_points;
+
+    if (unlikely (!repeat_x_deltas.resize (n * 2, false) ||
+                  !repeat_y_deltas.resize (n * 2, false) ||
+                  !repeat_points.resize (n * 2, false)))
+      return false;
+
+    unsigned contour_point_size = hb_static_size (contour_point_t);
+    for (unsigned i = 0; i < n; i++)
+    {
+      hb_memcpy ((void *) repeat_x_deltas.arrayZ, (const void *) x_deltas.arrayZ, n * sizeof (float));
+      hb_memcpy ((void *) (repeat_x_deltas.arrayZ + n), (const void *) x_deltas.arrayZ, n * sizeof (float));
+
+      hb_memcpy ((void *) repeat_y_deltas.arrayZ, (const void *) y_deltas.arrayZ, n * sizeof (float));
+      hb_memcpy ((void *) (repeat_y_deltas.arrayZ + n), (const void *) y_deltas.arrayZ, n * sizeof (float));
+
+      hb_memcpy ((void *) repeat_points.arrayZ, (const void *) contour_points.arrayZ, n * contour_point_size);
+      hb_memcpy ((void *) (repeat_points.arrayZ + n), (const void *) contour_points.arrayZ, n * contour_point_size);
+    }
+
+    hb_vector_t<unsigned> costs;
+    hb_vector_t<int> chain;
+    if (!_iup_contour_optimize_dp (repeat_points, repeat_x_deltas, repeat_y_deltas,
+                                   forced_set, tolerance, n,
+                                   costs, chain))
+      return false;
+
+    unsigned best_cost = n + 1;
+    int len = costs.length;
+    hb_set_t best_sol;
+    for (int start = n - 1; start < len; start++)
+    {
+      hb_set_t solution;
+      int i = start;
+      int lookback = start - (int) n;
+      while (i > lookback)
+      {
+        solution.add (i % n);
+        i = chain.arrayZ[i];
+      }
+      if (i == lookback)
+      {
+        unsigned cost_i = i < 0 ? 0 : costs.arrayZ[i];
+        unsigned cost = costs.arrayZ[start] - cost_i;
+        if (cost <= best_cost)
+        {
+          best_sol.set (solution);
+          best_cost = cost;
+        }
+      }
+    }
+
+    for (unsigned i = 0; i < n; i++)
+      if (best_sol.has (i))
+        opt_indices.arrayZ[i] = true;
+  }
+  return true;
+}
+
+bool iup_delta_optimize (const contour_point_vector_t& contour_points,
+                         const hb_vector_t<int>& x_deltas,
+                         const hb_vector_t<int>& y_deltas,
+                         hb_vector_t<bool>& opt_indices, /* OUT */
+                         float tolerance)
+{
+  if (!opt_indices.resize (contour_points.length))
+      return false;
+
+  hb_vector_t<unsigned> end_points;
+  unsigned count = contour_points.length;
+  if (unlikely (!end_points.alloc (count)))
+    return false;
+
+  for (unsigned i = 0; i < count - 4; i++)
+    if (contour_points.arrayZ[i].is_end_point)
+      end_points.push (i);
+
+  /* phantom points */
+  for (unsigned i = count - 4; i < count; i++)
+    end_points.push (i);
+
+  if (end_points.in_error ()) return false;
+
+  unsigned start = 0;
+  for (unsigned end : end_points)
+  {
+    unsigned len = end - start + 1;
+    if (!_iup_contour_optimize (contour_points.as_array ().sub_array (start, len),
+                                x_deltas.as_array ().sub_array (start, len),
+                                y_deltas.as_array ().sub_array (start, len),
+                                opt_indices.as_array ().sub_array (start, len),
+                                tolerance))
+      return false;
+    start = end + 1;
+  }
+  return true;
+}
diff --git a/thirdparty/harfbuzz/src/hb-subset-instancer-iup.hh b/thirdparty/harfbuzz/src/hb-subset-instancer-iup.hh
new file mode 100644
index 0000000000..7eac5935a4
--- /dev/null
+++ b/thirdparty/harfbuzz/src/hb-subset-instancer-iup.hh
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2024  Google, Inc.
+ *
+ *  This is part of HarfBuzz, a text shaping library.
+ *
+ * Permission is hereby granted, without written agreement and without
+ * license or royalty fees, to use, copy, modify, and distribute this
+ * software and its documentation for any purpose, provided that the
+ * above copyright notice and the following two paragraphs appear in
+ * all copies of this software.
+ *
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
+ * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
+ * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
+ * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
+ * DAMAGE.
+ *
+ * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
+ * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
+ * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
+ * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+ */
+
+#ifndef HB_SUBSET_INSTANCER_IUP_HH
+#define HB_SUBSET_INSTANCER_IUP_HH
+
+#include "hb-subset-plan.hh"
+/* given contour points and deltas, optimize a set of referenced points within error
+ * tolerance. Returns optimized referenced point indices */
+HB_INTERNAL bool iup_delta_optimize (const contour_point_vector_t& contour_points,
+                                     const hb_vector_t<int>& x_deltas,
+                                     const hb_vector_t<int>& y_deltas,
+                                     hb_vector_t<bool>& opt_indices, /* OUT */
+                                     float tolerance = 0.f);
+
+#endif /* HB_SUBSET_INSTANCER_IUP_HH */
diff --git a/thirdparty/harfbuzz/src/hb-subset-instancer-solver.cc b/thirdparty/harfbuzz/src/hb-subset-instancer-solver.cc
index 4876bc4379..70783c0a0d 100644
--- a/thirdparty/harfbuzz/src/hb-subset-instancer-solver.cc
+++ b/thirdparty/harfbuzz/src/hb-subset-instancer-solver.cc
@@ -256,7 +256,10 @@ _solve (Triple tent, Triple axisLimit, bool negative = false)
      */
     float newUpper = peak + (1 - gain) * (upper - peak);
     assert (axisMax <= newUpper);  // Because outGain > gain
-    if (newUpper <= axisDef + (axisMax - axisDef) * 2)
+    /* Disabled because ots doesn't like us:
+     * https://github.com/fonttools/fonttools/issues/3350 */
+    
+    if (false && (newUpper <= axisDef + (axisMax - axisDef) * 2))
     {
       upper = newUpper;
       if (!negative && axisDef + (axisMax - axisDef) * MAX_F2DOT14 < upper)
diff --git a/thirdparty/harfbuzz/src/hb-subset-plan-member-list.hh b/thirdparty/harfbuzz/src/hb-subset-plan-member-list.hh
index 71da80e387..74416b92f9 100644
--- a/thirdparty/harfbuzz/src/hb-subset-plan-member-list.hh
+++ b/thirdparty/harfbuzz/src/hb-subset-plan-member-list.hh
@@ -140,6 +140,15 @@ HB_SUBSET_PLAN_MEMBER (mutable hb_vector_t<unsigned>, bounds_height_vec)
 //map: new_gid -> contour points vector
 HB_SUBSET_PLAN_MEMBER (mutable hb_hashmap_t E(<hb_codepoint_t, contour_point_vector_t>), new_gid_contour_points_map)
 
+//new gids set for composite glyphs
+HB_SUBSET_PLAN_MEMBER (hb_set_t, composite_new_gids)
+
+//Old BASE item variation index -> (New varidx, 0) mapping
+HB_SUBSET_PLAN_MEMBER (hb_hashmap_t E(<unsigned, hb_pair_t E(<unsigned, int>)>), base_variation_idx_map)
+
+//BASE table varstore retained varidx mapping
+HB_SUBSET_PLAN_MEMBER (hb_vector_t<hb_inc_bimap_t>, base_varstore_inner_maps)
+
 #ifdef HB_EXPERIMENTAL_API
 // name table overrides map: hb_ot_name_record_ids_t-> name string new value or
 // None to indicate should remove
diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.cc b/thirdparty/harfbuzz/src/hb-subset-plan.cc
index 5786223196..068fddaedd 100644
--- a/thirdparty/harfbuzz/src/hb-subset-plan.cc
+++ b/thirdparty/harfbuzz/src/hb-subset-plan.cc
@@ -32,6 +32,7 @@
 
 #include "hb-ot-cmap-table.hh"
 #include "hb-ot-glyf-table.hh"
+#include "hb-ot-layout-base-table.hh"
 #include "hb-ot-layout-gdef-table.hh"
 #include "hb-ot-layout-gpos-table.hh"
 #include "hb-ot-layout-gsub-table.hh"
@@ -431,6 +432,52 @@ _collect_layout_variation_indices (hb_subset_plan_t* plan)
   gdef.destroy ();
   gpos.destroy ();
 }
+
+#ifndef HB_NO_BASE
+/* used by BASE table only, delta is always set to 0 in the output map */
+static inline void
+_remap_variation_indices (const hb_set_t& indices,
+                          unsigned subtable_count,
+                          hb_hashmap_t<unsigned, hb_pair_t<unsigned, int>>& variation_idx_delta_map /* OUT */)
+{
+  unsigned new_major = 0, new_minor = 0;
+  unsigned last_major = (indices.get_min ()) >> 16;
+  for (unsigned idx : indices)
+  {
+    uint16_t major = idx >> 16;
+    if (major >= subtable_count) break;
+    if (major != last_major)
+    {
+      new_minor = 0;
+      ++new_major;
+    }
+
+    unsigned new_idx = (new_major << 16) + new_minor;
+    variation_idx_delta_map.set (idx, hb_pair_t<unsigned, int> (new_idx, 0));
+    ++new_minor;
+    last_major = major;
+  }
+}
+
+static inline void
+_collect_base_variation_indices (hb_subset_plan_t* plan)
+{
+  hb_blob_ptr_t<OT::BASE> base = plan->source_table<OT::BASE> ();
+  if (!base->has_var_store ())
+  {
+    base.destroy ();
+    return;
+  }
+
+  hb_set_t varidx_set;
+  base->collect_variation_indices (plan, varidx_set);
+  unsigned subtable_count = base->get_var_store ().get_sub_table_count ();
+  base.destroy ();
+
+  _remap_variation_indices (varidx_set, subtable_count, plan->base_variation_idx_map);
+  _generate_varstore_inner_maps (varidx_set, subtable_count, plan->base_varstore_inner_maps);
+}
+#endif
 #endif
 
 static inline void
@@ -994,8 +1041,8 @@ _update_instance_metrics_map_from_cff2 (hb_subset_plan_t *plan)
   OT::cff2::accelerator_t cff2 (plan->source);
   if (!cff2.is_valid ()) return;
 
-  hb_font_t *font = nullptr;
-  if (unlikely (!plan->check_success (font = _get_hb_font_with_variations (plan))))
+  hb_font_t *font = _get_hb_font_with_variations (plan);
+  if (unlikely (!plan->check_success (font != nullptr)))
   {
     hb_font_destroy (font);
     return;
@@ -1073,8 +1120,8 @@ _update_instance_metrics_map_from_cff2 (hb_subset_plan_t *plan)
 static bool
 _get_instance_glyphs_contour_points (hb_subset_plan_t *plan)
 {
-  /* contour_points vector only needed for updating gvar table (infer delta)
-   * during partial instancing */
+  /* contour_points vector only needed for updating gvar table (infer delta and
+   * iup delta optimization) during partial instancing */
   if (plan->user_axes_location.is_empty () || plan->all_axes_pinned)
     return true;
 
@@ -1092,10 +1139,17 @@ _get_instance_glyphs_contour_points (hb_subset_plan_t *plan)
     }
 
     hb_codepoint_t old_gid = _.second;
-    if (unlikely (!glyf.glyph_for_gid (old_gid).get_all_points_without_var (plan->source, all_points)))
+    auto glyph = glyf.glyph_for_gid (old_gid);
+    if (unlikely (!glyph.get_all_points_without_var (plan->source, all_points)))
       return false;
     if (unlikely (!plan->new_gid_contour_points_map.set (new_gid, all_points)))
       return false;
+
+#ifdef HB_EXPERIMENTAL_API
+    /* composite new gids are only needed by iup delta optimization */
+    if ((plan->flags & HB_SUBSET_FLAGS_OPTIMIZE_IUP_DELTAS) && glyph.is_composite ())
+      plan->composite_new_gids.add (new_gid);
+#endif
   }
   return true;
 }
@@ -1205,6 +1259,13 @@ hb_subset_plan_t::hb_subset_plan_t (hb_face_t *face,
   if (!drop_tables.has (HB_OT_TAG_GDEF))
     _remap_used_mark_sets (this, used_mark_sets_map);
 
+#ifndef HB_NO_VAR
+#ifndef HB_NO_BASE
+  if (!drop_tables.has (HB_OT_TAG_BASE))
+    _collect_base_variation_indices (this);
+#endif
+#endif
+
   if (unlikely (in_error ()))
     return;
 
diff --git a/thirdparty/harfbuzz/src/hb-subset-plan.hh b/thirdparty/harfbuzz/src/hb-subset-plan.hh
index 1f19a58c1e..19a9fa6918 100644
--- a/thirdparty/harfbuzz/src/hb-subset-plan.hh
+++ b/thirdparty/harfbuzz/src/hb-subset-plan.hh
@@ -78,6 +78,13 @@ struct contour_point_t
 	  y  = x * matrix[1] + y * matrix[3];
     x  = x_;
   }
+
+  void add_delta (float delta_x, float delta_y)
+  {
+    x += delta_x;
+    y += delta_y;
+  }
+
   HB_ALWAYS_INLINE
   void translate (const contour_point_t &p) { x += p.x; y += p.y; }
 
@@ -99,6 +106,22 @@ struct contour_point_vector_t : hb_vector_t<contour_point_t>
     unsigned count = a.length;
     hb_memcpy (arrayZ, a.arrayZ, count * sizeof (arrayZ[0]));
   }
+
+  bool add_deltas (const hb_vector_t<float> deltas_x,
+                   const hb_vector_t<float> deltas_y,
+                   const hb_vector_t<bool> indices)
+  {
+    if (indices.length != deltas_x.length ||
+        indices.length != deltas_y.length)
+      return false;
+
+    for (unsigned i = 0; i < indices.length; i++)
+    {
+      if (!indices.arrayZ[i]) continue;
+      arrayZ[i].add_delta (deltas_x.arrayZ[i], deltas_y.arrayZ[i]);
+    }
+    return true;
+  }
 };
 
 namespace OT {
@@ -147,7 +170,7 @@ struct hb_subset_plan_t
   bool gsub_insert_catch_all_feature_variation_rec;
   bool gpos_insert_catch_all_feature_variation_rec;
 
-  // whether GDEF VarStore is retained
+  // whether GDEF ItemVariationStore is retained
   mutable bool has_gdef_varstore;
 
 #define HB_SUBSET_PLAN_MEMBER(Type, Name) Type Name;
diff --git a/thirdparty/harfbuzz/src/hb-subset.cc b/thirdparty/harfbuzz/src/hb-subset.cc
index 06e77dd8eb..f10ef54dbd 100644
--- a/thirdparty/harfbuzz/src/hb-subset.cc
+++ b/thirdparty/harfbuzz/src/hb-subset.cc
@@ -48,6 +48,7 @@
 #include "hb-ot-cff2-table.hh"
 #include "hb-ot-vorg-table.hh"
 #include "hb-ot-name-table.hh"
+#include "hb-ot-layout-base-table.hh"
 #include "hb-ot-layout-gsub-table.hh"
 #include "hb-ot-layout-gpos-table.hh"
 #include "hb-ot-var-avar-table.hh"
@@ -503,6 +504,7 @@ _subset_table (hb_subset_plan_t *plan,
   case HB_OT_TAG_CBLC: return _subset<const OT::CBLC> (plan, buf);
   case HB_OT_TAG_CBDT: return true; /* skip CBDT, handled by CBLC */
   case HB_OT_TAG_MATH: return _subset<const OT::MATH> (plan, buf);
+  case HB_OT_TAG_BASE: return _subset<const OT::BASE> (plan, buf);
 
 #ifndef HB_NO_SUBSET_CFF
   case HB_OT_TAG_CFF1: return _subset<const OT::cff1> (plan, buf);
@@ -548,6 +550,7 @@ _subset_table (hb_subset_plan_t *plan,
     }
 #endif
     return _passthrough (plan, tag);
+
   default:
     if (plan->flags & HB_SUBSET_FLAGS_PASSTHROUGH_UNRECOGNIZED)
       return _passthrough (plan, tag);
diff --git a/thirdparty/harfbuzz/src/hb-subset.h b/thirdparty/harfbuzz/src/hb-subset.h
index d79e7f762a..73dcae4660 100644
--- a/thirdparty/harfbuzz/src/hb-subset.h
+++ b/thirdparty/harfbuzz/src/hb-subset.h
@@ -76,6 +76,8 @@ typedef struct hb_subset_plan_t hb_subset_plan_t;
  * @HB_SUBSET_FLAGS_IFTB_REQUIREMENTS: If set enforce requirements on the output subset
  * to allow it to be used with incremental font transfer IFTB patches. Primarily,
  * this forces all outline data to use long (32 bit) offsets. Since: EXPERIMENTAL
+ * @HB_SUBSET_FLAGS_OPTIMIZE_IUP_DELTAS: If set perform IUP delta optimization on the
+ * remaining gvar table's deltas. Since: EXPERIMENTAL
  *
  * List of boolean properties that can be configured on the subset input.
  *
@@ -95,6 +97,7 @@ typedef enum { /*< flags >*/
   HB_SUBSET_FLAGS_NO_LAYOUT_CLOSURE =        0x00000200u,
 #ifdef HB_EXPERIMENTAL_API
   HB_SUBSET_FLAGS_IFTB_REQUIREMENTS       =  0x00000400u,
+  HB_SUBSET_FLAGS_OPTIMIZE_IUP_DELTAS	  =  0x00000800u,
 #endif
 } hb_subset_flags_t;
 
@@ -171,6 +174,10 @@ hb_subset_input_set_flags (hb_subset_input_t *input,
 			   unsigned value);
 
 HB_EXTERN hb_bool_t
+hb_subset_input_pin_all_axes_to_default (hb_subset_input_t  *input,
+					 hb_face_t          *face);
+
+HB_EXTERN hb_bool_t
 hb_subset_input_pin_axis_to_default (hb_subset_input_t  *input,
 				     hb_face_t          *face,
 				     hb_tag_t            axis_tag);
@@ -183,12 +190,19 @@ hb_subset_input_pin_axis_location (hb_subset_input_t  *input,
 
 #ifdef HB_EXPERIMENTAL_API
 HB_EXTERN hb_bool_t
+hb_subset_input_get_axis_range (hb_subset_input_t  *input,
+				hb_tag_t            axis_tag,
+				float              *axis_min_value,
+				float              *axis_max_value,
+				float              *axis_def_value);
+
+HB_EXTERN hb_bool_t
 hb_subset_input_set_axis_range (hb_subset_input_t  *input,
 				hb_face_t          *face,
 				hb_tag_t            axis_tag,
 				float               axis_min_value,
 				float               axis_max_value,
-				float              *axis_def_value);
+				float               axis_def_value);
 
 HB_EXTERN hb_bool_t
 hb_subset_input_override_name_table (hb_subset_input_t  *input,
diff --git a/thirdparty/harfbuzz/src/hb-vector.hh b/thirdparty/harfbuzz/src/hb-vector.hh
index dfe1b7d1c7..c0cc7063ff 100644
--- a/thirdparty/harfbuzz/src/hb-vector.hh
+++ b/thirdparty/harfbuzz/src/hb-vector.hh
@@ -78,7 +78,7 @@ struct hb_vector_t
     if (unlikely (in_error ())) return;
     copy_array (o);
   }
-  hb_vector_t (hb_vector_t &&o)
+  hb_vector_t (hb_vector_t &&o) noexcept
   {
     allocated = o.allocated;
     length = o.length;
@@ -122,7 +122,7 @@ struct hb_vector_t
     resize (0);
   }
 
-  friend void swap (hb_vector_t& a, hb_vector_t& b)
+  friend void swap (hb_vector_t& a, hb_vector_t& b) noexcept
   {
     hb_swap (a.allocated, b.allocated);
     hb_swap (a.length, b.length);
@@ -139,7 +139,7 @@ struct hb_vector_t
 
     return *this;
   }
-  hb_vector_t& operator = (hb_vector_t &&o)
+  hb_vector_t& operator = (hb_vector_t &&o) noexcept
   {
     hb_swap (*this, o);
     return *this;
diff --git a/thirdparty/harfbuzz/src/hb-version.h b/thirdparty/harfbuzz/src/hb-version.h
index b08dd1f09f..68681874ca 100644
--- a/thirdparty/harfbuzz/src/hb-version.h
+++ b/thirdparty/harfbuzz/src/hb-version.h
@@ -47,7 +47,7 @@ HB_BEGIN_DECLS
  *
  * The minor component of the library version available at compile-time.
  */
-#define HB_VERSION_MINOR 3
+#define HB_VERSION_MINOR 4
 /**
  * HB_VERSION_MICRO:
  *
@@ -60,7 +60,7 @@ HB_BEGIN_DECLS
  *
  * A string literal containing the library version available at compile-time.
  */
-#define HB_VERSION_STRING "8.3.0"
+#define HB_VERSION_STRING "8.4.0"
 
 /**
  * HB_VERSION_ATLEAST:
diff --git a/thirdparty/harfbuzz/src/hb.hh b/thirdparty/harfbuzz/src/hb.hh
index 972608d6a3..0ceeb99f50 100644
--- a/thirdparty/harfbuzz/src/hb.hh
+++ b/thirdparty/harfbuzz/src/hb.hh
@@ -64,6 +64,7 @@
 #pragma GCC diagnostic error   "-Wbitwise-instead-of-logical"
 #pragma GCC diagnostic error   "-Wcast-align"
 #pragma GCC diagnostic error   "-Wcast-function-type"
+#pragma GCC diagnostic error   "-Wcast-function-type-strict"
 #pragma GCC diagnostic error   "-Wconstant-conversion"
 #pragma GCC diagnostic error   "-Wcomma"
 #pragma GCC diagnostic error   "-Wdelete-non-virtual-dtor"
@@ -177,6 +178,11 @@
 #define HB_EXTERN __declspec (dllexport) extern
 #endif
 
+// https://github.com/harfbuzz/harfbuzz/pull/4619
+#ifndef __STDC_FORMAT_MACROS
+#define __STDC_FORMAT_MACROS 1
+#endif
+
 #include "hb.h"
 #define HB_H_IN
 #include "hb-ot.h"
@@ -212,6 +218,12 @@
 #include <winapifamily.h>
 #endif
 
+#ifndef PRId32
+# define PRId32 "d"
+# define PRIu32 "u"
+# define PRIx32 "x"
+#endif
+
 #define HB_PASTE1(a,b) a##b
 #define HB_PASTE(a,b) HB_PASTE1(a,b)
 
diff --git a/thirdparty/libktx/lib/dfdutils/createdfd.c b/thirdparty/libktx/lib/dfdutils/createdfd.c
index 2a5af00c7e..eb64204189 100644
--- a/thirdparty/libktx/lib/dfdutils/createdfd.c
+++ b/thirdparty/libktx/lib/dfdutils/createdfd.c
@@ -94,6 +94,10 @@ static uint32_t setChannelFlags(uint32_t channel, enum VkSuffix suffix)
             channel |= KHR_DF_SAMPLE_DATATYPE_LINEAR;
         }
         break;
+    case s_S10_5:
+        channel |=
+            KHR_DF_SAMPLE_DATATYPE_SIGNED;
+        break;
     }
     return channel;
 }
@@ -109,7 +113,6 @@ static void writeSample(uint32_t *DFD, int sampleNo, int channel,
         float f;
     } lower, upper;
     uint32_t *sample = DFD + 1 + KHR_DF_WORD_SAMPLESTART + sampleNo * KHR_DF_WORD_SAMPLEWORDS;
-    if (channel == 3) channel = KHR_DF_CHANNEL_RGBSDA_ALPHA;
 
     if (channel == 3) channel = KHR_DF_CHANNEL_RGBSDA_ALPHA;
     channel = setChannelFlags(channel, suffix);
@@ -159,6 +162,10 @@ static void writeSample(uint32_t *DFD, int sampleNo, int channel,
         upper.f = 1.0f;
         lower.f = 0.0f;
         break;
+    case s_S10_5:
+        assert(bits == 16 && "Format with this suffix must be 16 bits per channel.");
+        upper.i = 32;
+        lower.i = ~upper.i + 1; // -32
     }
     sample[KHR_DF_SAMPLEWORD_SAMPLELOWER] = lower.i;
     sample[KHR_DF_SAMPLEWORD_SAMPLEUPPER] = upper.i;
@@ -230,8 +237,9 @@ uint32_t *createDFDUnpacked(int bigEndian, int numChannels, int bytes,
  * @param bits[] An array of length numChannels.
  *               Each entry is the number of bits composing the channel, in
  *               order starting at bit 0 of the packed type.
- * @param paddings[] An array of length numChannels.
- *                   Each entry is the number of padding bits after each channel.
+ * @param shiftBits[] An array of length numChannels.
+ *                    Each entry is the number of bits each channel is shifted
+ *                    and thus padded with insignificant bits.
  * @param channels[] An array of length numChannels.
  *                   Each entry enumerates the channel type: 0 = red, 1 = green,
  *                   2 = blue, 15 = alpha, in order starting at bit 0 of the
@@ -243,9 +251,9 @@ uint32_t *createDFDUnpacked(int bigEndian, int numChannels, int bytes,
  * @return A data format descriptor in malloc'd data. The caller is responsible
  *         for freeing the descriptor.
  **/
-uint32_t *createDFDPackedPadded(int bigEndian, int numChannels,
-                                int bits[], int paddings[], int channels[],
-                                enum VkSuffix suffix)
+uint32_t *createDFDPackedShifted(int bigEndian, int numChannels,
+                                 int bits[], int shiftBits[], int channels[],
+                                 enum VkSuffix suffix)
 {
     uint32_t *DFD = 0;
     if (numChannels == 6) {
@@ -291,17 +299,18 @@ uint32_t *createDFDPackedPadded(int bigEndian, int numChannels,
         int sampleCounter;
         for (channelCounter = 0; channelCounter < numChannels; ++channelCounter) {
             beChannelStart[channelCounter] = totalBits;
-            totalBits += bits[channelCounter] + paddings[channelCounter];
+            totalBits += shiftBits[channelCounter] + bits[channelCounter];
         }
         BEMask = (totalBits - 1) & 0x18;
         for (channelCounter = 0; channelCounter < numChannels; ++channelCounter) {
+            bitOffset += shiftBits[channelCounter];
             bitChannel[bitOffset ^ BEMask] = channelCounter;
             if (((bitOffset + bits[channelCounter] - 1) & ~7) != (bitOffset & ~7)) {
                 /* Continuation sample */
                 bitChannel[((bitOffset + bits[channelCounter] - 1) & ~7) ^ BEMask] = channelCounter;
                 numSamples++;
             }
-            bitOffset += bits[channelCounter] + paddings[channelCounter];
+            bitOffset += bits[channelCounter];
         }
         DFD = writeHeader(numSamples, totalBits >> 3, suffix, i_COLOR);
 
@@ -343,16 +352,17 @@ uint32_t *createDFDPackedPadded(int bigEndian, int numChannels,
         int totalBits = 0;
         int bitOffset = 0;
         for (sampleCounter = 0; sampleCounter < numChannels; ++sampleCounter) {
-            totalBits += bits[sampleCounter] + paddings[sampleCounter];
+            totalBits += shiftBits[sampleCounter] + bits[sampleCounter];
         }
 
         /* One sample per channel */
         DFD = writeHeader(numChannels, totalBits >> 3, suffix, i_COLOR);
         for (sampleCounter = 0; sampleCounter < numChannels; ++sampleCounter) {
+            bitOffset += shiftBits[sampleCounter];
             writeSample(DFD, sampleCounter, channels[sampleCounter],
                         bits[sampleCounter], bitOffset,
                         1, 1, suffix);
-            bitOffset += bits[sampleCounter] + paddings[sampleCounter];
+            bitOffset += bits[sampleCounter];
         }
     }
     return DFD;
@@ -383,12 +393,12 @@ uint32_t *createDFDPacked(int bigEndian, int numChannels,
                           int bits[], int channels[],
                           enum VkSuffix suffix) {
     assert(numChannels <= 6);
-    int paddings[] = {0, 0, 0, 0, 0, 0};
-    return createDFDPackedPadded(bigEndian, numChannels, bits, paddings, channels, suffix);
+    int shiftBits[] = {0, 0, 0, 0, 0, 0};
+    return createDFDPackedShifted(bigEndian, numChannels, bits, shiftBits, channels, suffix);
 }
 
 uint32_t *createDFD422(int bigEndian, int numSamples,
-                       int bits[], int paddings[], int channels[],
+                       int bits[], int shiftBits[], int channels[],
                        int position_xs[], int position_ys[],
                        enum VkSuffix suffix) {
     assert(!bigEndian); (void) bigEndian;
@@ -396,7 +406,7 @@ uint32_t *createDFD422(int bigEndian, int numSamples,
 
     int totalBits = 0;
     for (int i = 0; i < numSamples; ++i)
-        totalBits += bits[i] + paddings[i];
+        totalBits += shiftBits[i] + bits[i];
     assert(totalBits % 8 == 0);
 
     uint32_t BDFDSize = sizeof(uint32_t) * (KHR_DF_WORD_SAMPLESTART + numSamples * KHR_DF_WORD_SAMPLEWORDS);
@@ -428,6 +438,7 @@ uint32_t *createDFD422(int bigEndian, int numSamples,
 
     int bitOffset = 0;
     for (int i = 0; i < numSamples; ++i) {
+        bitOffset += shiftBits[i];
         KHR_DFDSETSVAL(BDFD, i, BITOFFSET, bitOffset);
         KHR_DFDSETSVAL(BDFD, i, BITLENGTH, bits[i] - 1);
         KHR_DFDSETSVAL(BDFD, i, CHANNELID, channels[i]);
@@ -438,7 +449,7 @@ uint32_t *createDFD422(int bigEndian, int numSamples,
         KHR_DFDSETSVAL(BDFD, i, SAMPLEPOSITION3, 0);
         KHR_DFDSETSVAL(BDFD, i, SAMPLELOWER, 0);
         KHR_DFDSETSVAL(BDFD, i, SAMPLEUPPER, (1u << bits[i]) - 1u);
-        bitOffset += bits[i] + paddings[i];
+        bitOffset += bits[i];
     }
 
     return DFD;
diff --git a/thirdparty/libktx/lib/dfdutils/dfd.h b/thirdparty/libktx/lib/dfdutils/dfd.h
index 7f1fb74a6b..756490fc82 100644
--- a/thirdparty/libktx/lib/dfdutils/dfd.h
+++ b/thirdparty/libktx/lib/dfdutils/dfd.h
@@ -35,7 +35,8 @@ enum VkSuffix {
     s_SINT,    /*!< Signed integer format. */
     s_SFLOAT,  /*!< Signed float format. */
     s_UFLOAT,  /*!< Unsigned float format. */
-    s_SRGB     /*!< sRGB normalized format. */
+    s_SRGB,    /*!< sRGB normalized format. */
+    s_S10_5    /*!< 2's complement fixed-point; 5 fractional bits. */
 };
 
 /** Compression scheme, in Vulkan terms. */
@@ -68,15 +69,16 @@ typedef unsigned int uint32_t;
 #endif
 
 uint32_t* vk2dfd(enum VkFormat format);
+enum VkFormat dfd2vk(uint32_t* dfd);
 
 /* Create a Data Format Descriptor for an unpacked format. */
 uint32_t *createDFDUnpacked(int bigEndian, int numChannels, int bytes,
                             int redBlueSwap, enum VkSuffix suffix);
 
 /* Create a Data Format Descriptor for a packed padded format. */
-uint32_t *createDFDPackedPadded(int bigEndian, int numChannels,
-                                int bits[], int paddings[], int channels[],
-                                enum VkSuffix suffix);
+uint32_t *createDFDPackedShifted(int bigEndian, int numChannels,
+                                 int bits[], int shiftBits[],
+                                 int channels[], enum VkSuffix suffix);
 
 /* Create a Data Format Descriptor for a packed format. */
 uint32_t *createDFDPacked(int bigEndian, int numChannels,
@@ -85,7 +87,7 @@ uint32_t *createDFDPacked(int bigEndian, int numChannels,
 
 /* Create a Data Format Descriptor for a 4:2:2 format. */
 uint32_t *createDFD422(int bigEndian, int numChannels,
-                       int bits[], int paddings[], int channels[],
+                       int bits[], int shiftBits[], int channels[],
                        int position_xs[], int position_ys[],
                        enum VkSuffix suffix);
 
@@ -111,10 +113,11 @@ enum InterpretDFDResult {
     i_SRGB_FORMAT_BIT          = 1u << 2u, /*!< sRGB transfer function. */
     i_NORMALIZED_FORMAT_BIT    = 1u << 3u, /*!< Normalized (UNORM or SNORM). */
     i_SIGNED_FORMAT_BIT        = 1u << 4u, /*!< Format is signed. */
-    i_FLOAT_FORMAT_BIT         = 1u << 5u, /*!< Format is floating point. */
-    i_COMPRESSED_FORMAT_BIT    = 1u << 6u, /*!< Format is block compressed (422). */
-    i_YUVSDA_FORMAT_BIT        = 1u << 7u, /*!< Color model is YUVSDA. */
-    i_UNSUPPORTED_ERROR_BIT    = 1u << 8u, /*!< Format not successfully interpreted. */
+    i_FIXED_FORMAT_BIT         = 1u << 5u, /*!< Format is a fixed-point representation. */
+    i_FLOAT_FORMAT_BIT         = 1u << 6u, /*!< Format is floating point. */
+    i_COMPRESSED_FORMAT_BIT    = 1u << 7u, /*!< Format is block compressed (422). */
+    i_YUVSDA_FORMAT_BIT        = 1u << 8u, /*!< Color model is YUVSDA. */
+    i_UNSUPPORTED_ERROR_BIT    = 1u << 9u, /*!< Format not successfully interpreted. */
     /** "NONTRIVIAL_ENDIANNESS" means not big-endian, not little-endian
      * (a channel has bits that are not consecutive in either order). **/
     i_UNSUPPORTED_NONTRIVIAL_ENDIANNESS     = i_UNSUPPORTED_ERROR_BIT,
@@ -198,9 +201,12 @@ getDFDComponentInfoUnpacked(const uint32_t* DFD, uint32_t* numComponents,
 /* Return the number of components described by a DFD. */
 uint32_t getDFDNumComponents(const uint32_t* DFD);
 
-/* Recreate and return the value of bytesPlane0 as it should be for the data
+/* Reconstruct and return the value of bytesPlane0 as it should be for the data
  * post-inflation from variable-rate compression.
  */
+uint32_t
+reconstructDFDBytesPlane0FromSamples(const uint32_t* DFD);
+/* Deprecated. For backward compatibility. */
 void
 recreateBytesPlane0FromSampleInfo(const uint32_t* DFD, uint32_t* bytesPlane0);
 
diff --git a/thirdparty/libktx/lib/dfdutils/dfd2vk.inl b/thirdparty/libktx/lib/dfdutils/dfd2vk.inl
index 8b4de41597..a616566c74 100644
--- a/thirdparty/libktx/lib/dfdutils/dfd2vk.inl
+++ b/thirdparty/libktx/lib/dfdutils/dfd2vk.inl
@@ -4,12 +4,20 @@
 /***************************** Do not edit.  *****************************
              Automatically generated by makedfd2vk.pl.
  *************************************************************************/
-if (KHR_DFDVAL(dfd + 1, MODEL) == KHR_DF_MODEL_RGBSDA) {
+if (KHR_DFDVAL(dfd + 1, MODEL) == KHR_DF_MODEL_RGBSDA || KHR_DFDVAL(dfd + 1, MODEL) == KHR_DF_MODEL_YUVSDA) {
   enum InterpretDFDResult r;
   InterpretedDFDChannel R = {0,0};
   InterpretedDFDChannel G = {0,0};
   InterpretedDFDChannel B = {0,0};
   InterpretedDFDChannel A = {0,0};
+  /* interpretDFD channel overloadings for YUVSDA formats. These are
+   * different from the mapping used by Vulkan. */
+  #define Y1 R
+  #define Y2 A
+  #define CB G
+  #define U G
+  #define CR B
+  #define V B
   uint32_t wordBytes;
 
   /* Special case exponent format */
@@ -44,8 +52,17 @@ if (KHR_DFDVAL(dfd + 1, MODEL) == KHR_DF_MODEL_RGBSDA) {
     else if (wordBytes == 2) { /* PACK16 */
       if (A.size == 4) {
         if (R.offset == 12) return VK_FORMAT_R4G4B4A4_UNORM_PACK16;
-        else return VK_FORMAT_B4G4R4A4_UNORM_PACK16;
-      } else if (A.size == 0) { /* Three channels */
+        else if (B.offset == 12) return VK_FORMAT_B4G4R4A4_UNORM_PACK16;
+        else if (A.offset == 12) {
+          if (R.offset == 8) return VK_FORMAT_A4R4G4B4_UNORM_PACK16;
+          else return VK_FORMAT_A4B4G4R4_UNORM_PACK16;
+        }
+      } else if (G.size == 0 && B.size == 0 && A.size == 0) { /* One channel */
+        if (R.size == 10)
+          return VK_FORMAT_R10X6_UNORM_PACK16;
+        else if (R.size ==12)
+          return VK_FORMAT_R12X4_UNORM_PACK16;
+       } else if (A.size == 0) { /* Three channels */
         if (B.offset == 0) return VK_FORMAT_R5G6B5_UNORM_PACK16;
         else return VK_FORMAT_B5G6R5_UNORM_PACK16;
       } else { /* Four channels, one-bit alpha */
@@ -54,7 +71,7 @@ if (KHR_DFDVAL(dfd + 1, MODEL) == KHR_DF_MODEL_RGBSDA) {
         if (B.offset == 10) return VK_FORMAT_A1B5G5R5_UNORM_PACK16_KHR;
         return VK_FORMAT_B5G5R5A1_UNORM_PACK16;
       }
-    } else if (wordBytes == 4) { /* PACK32 */
+    } else if (wordBytes == 4) { /* PACK32 or 2PACK16 */
       if (A.size == 8) {
         if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_A8B8G8R8_SRGB_PACK32;
         if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
@@ -71,133 +88,171 @@ if (KHR_DFDVAL(dfd + 1, MODEL) == KHR_DF_MODEL_RGBSDA) {
         if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_A2B10G10R10_SNORM_PACK32;
         if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_A2B10G10R10_UINT_PACK32;
         if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_A2B10G10R10_SINT_PACK32;
-      } else if (R.size == 11) return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
+      } else if (R.size == 11) {
+          return VK_FORMAT_B10G11R11_UFLOAT_PACK32;
+      } else if (R.size == 10 && G.size == 10 && B.size == 0) { 
+          return VK_FORMAT_R10X6G10X6_UNORM_2PACK16;
+      } else if (R.size == 12 && G.size == 12 && B.size == 0) { 
+          return VK_FORMAT_R12X4G12X4_UNORM_2PACK16;
+      }
+    } else if (wordBytes == 8) { /* 4PACK16 */
+      if (r & i_YUVSDA_FORMAT_BIT) {
+        /* In Vulkan G = Y, R = Cr, B = Cb. */
+        if (Y1.size == 10 && Y1.offset == 6 && Y2.size == 10 && Y2.offset == 38)
+          return  VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16;
+        if (Y1.size == 10 && Y1.offset == 22 && Y2.size == 10 && Y2.offset == 54)
+          return VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16;
+        if (Y1.size == 12 && Y1.offset == 4 && Y2.size == 12 && Y2.offset == 36)
+          return VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16;
+        if (Y1.size == 12 && Y1.offset == 20 && Y2.size == 12 && Y2.offset == 52)
+          return VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16;
+      } else {
+        if (R.size == 10)
+          return VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16;
+        else if (R.size == 12)
+          return VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16;
+      }
     }
   } else { /* Not a packed format */
-    if (wordBytes == 1) {
-      if (A.size > 8 && R.size == 0 && G.size == 0 && B.size == 0 && (r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) {
-          return VK_FORMAT_A8_UNORM_KHR;
-      }
-      if (A.size > 0) { /* 4 channels */
-        if (R.offset == 0) { /* RGBA */
-          if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_SRGB;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_UNORM;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_SNORM;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_SINT;
-        } else { /* BGRA */
-          if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_SRGB;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_UNORM;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_SNORM;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_SINT;
-        }
-      } else if (B.size > 0) { /* 3 channels */
-        if (R.offset == 0) { /* RGB */
-          if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_R8G8B8_SRGB;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8_UNORM;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8_SNORM;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8_SINT;
-        } else { /* BGR */
-          if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_B8G8R8_SRGB;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8_UNORM;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8_SNORM;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8_SINT;
+    if (r & i_YUVSDA_FORMAT_BIT) {
+      /* In Vulkan G = Y, R = Cr, B = Cb. */
+      if (Y1.size == 1 && Y1.offset == 0 && Y2.size == 1 && Y2.offset == 2)
+        return VK_FORMAT_G8B8G8R8_422_UNORM;
+      else if (Y1.size == 1 && Y1.offset == 1 && Y2.size == 1 && Y2.offset == 3)
+        return VK_FORMAT_B8G8R8G8_422_UNORM;
+      else if (Y1.size == 2 && Y1.offset == 0 && Y2.size == 2 && Y2.offset == 4)
+        return VK_FORMAT_G16B16G16R16_422_UNORM;
+      else if (Y1.size == 2 && Y1.offset == 2 && Y2.size == 2 && Y2.offset == 6)
+        return VK_FORMAT_B16G16R16G16_422_UNORM;
+      else
+        return VK_FORMAT_UNDEFINED; // Until support added.
+    } else { /* Not YUV */
+      if (wordBytes == 1) {
+        if (A.size == 1 && R.size == 0 && G.size == 0 && B.size == 0 && (r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) {
+            return VK_FORMAT_A8_UNORM_KHR;
         }
-      } else if (G.size > 0) { /* 2 channels */
+        if (A.size > 0) { /* 4 channels */
+          if (R.offset == 0) { /* RGBA */
+            if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_SRGB;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_UNORM;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_SNORM;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8A8_SINT;
+          } else { /* BGRA */
+            if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_SRGB;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_UNORM;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_SNORM;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8A8_SINT;
+          }
+        } else if (B.size > 0) { /* 3 channels */
+          if (R.offset == 0) { /* RGB */
+            if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_R8G8B8_SRGB;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8_UNORM;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8_SNORM;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8B8_SINT;
+          } else { /* BGR */
+            if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_B8G8R8_SRGB;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8_UNORM;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8_SNORM;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_B8G8R8_SINT;
+          }
+        } else if (G.size > 0) { /* 2 channels */
           if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_R8G8_SRGB;
           if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8_UNORM;
           if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8_SNORM;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8_UINT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8G8_SINT;
-      } else { /* 1 channel */
+        } else { /* 1 channel */
           if ((r & i_SRGB_FORMAT_BIT)) return VK_FORMAT_R8_SRGB;
           if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8_UNORM;
           if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8_SNORM;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8_UINT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R8_SINT;
-      }
-    } else if (wordBytes == 2) {
-      if (A.size > 0) { /* 4 channels */
-        if (R.offset == 0) { /* RGBA */
-          if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_SFLOAT;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_UNORM;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_SNORM;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_SINT;
-        } else { /* BGRA */
         }
-      } else if (B.size > 0) { /* 3 channels */
-        if (R.offset == 0) { /* RGB */
-          if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R16G16B16_SFLOAT;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16_UNORM;
-          if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16_SNORM;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16_SINT;
-        } else { /* BGR */
-        }
-      } else if (G.size > 0) { /* 2 channels */
+      } else if (wordBytes == 2) {
+        if ((r & i_FIXED_FORMAT_BIT) && R.size == 2 && G.size == 2)  return  VK_FORMAT_R16G16_S10_5_NV;
+        if (A.size > 0) { /* 4 channels */
+          if (R.offset == 0) { /* RGBA */
+            if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_SFLOAT;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_UNORM;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_SNORM;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16A16_SINT;
+          } else { /* BGRA */
+          }
+        } else if (B.size > 0) { /* 3 channels */
+          if (R.offset == 0) { /* RGB */
+            if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R16G16B16_SFLOAT;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16_UNORM;
+            if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16_SNORM;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16B16_SINT;
+          } else { /* BGR */
+          }
+        } else if (G.size > 0) { /* 2 channels */
           if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R16G16_SFLOAT;
           if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16_UNORM;
           if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16_SNORM;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16_UINT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16G16_SINT;
-      } else { /* 1 channel */
+        } else { /* 1 channel */
           if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R16_SFLOAT;
           if ((r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16_UNORM;
           if ((r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16_SNORM;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16_UINT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R16_SINT;
-      }
-    } else if (wordBytes == 4) {
-      if (A.size > 0) { /* 4 channels */
-        if (R.offset == 0) { /* RGBA */
-          if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R32G32B32A32_SFLOAT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32B32A32_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32B32A32_SINT;
-        } else { /* BGRA */
-        }
-      } else if (B.size > 0) { /* 3 channels */
-        if (R.offset == 0) { /* RGB */
-          if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R32G32B32_SFLOAT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32B32_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32B32_SINT;
-        } else { /* BGR */
         }
-      } else if (G.size > 0) { /* 2 channels */
+      } else if (wordBytes == 4) {
+        if (A.size > 0) { /* 4 channels */
+          if (R.offset == 0) { /* RGBA */
+            if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R32G32B32A32_SFLOAT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32B32A32_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32B32A32_SINT;
+          } else { /* BGRA */
+          }
+        } else if (B.size > 0) { /* 3 channels */
+          if (R.offset == 0) { /* RGB */
+            if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R32G32B32_SFLOAT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32B32_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32B32_SINT;
+          } else { /* BGR */
+          }
+        } else if (G.size > 0) { /* 2 channels */
           if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R32G32_SFLOAT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32_UINT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32G32_SINT;
-      } else { /* 1 channel */
+        } else { /* 1 channel */
           if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R32_SFLOAT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32_UINT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R32_SINT;
-      }
-    } else if (wordBytes == 8) {
-      if (A.size > 0) { /* 4 channels */
-        if (R.offset == 0) { /* RGBA */
-          if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R64G64B64A64_SFLOAT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64B64A64_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64B64A64_SINT;
-        } else { /* BGRA */
-        }
-      } else if (B.size > 0) { /* 3 channels */
-        if (R.offset == 0) { /* RGB */
-          if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R64G64B64_SFLOAT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64B64_UINT;
-          if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64B64_SINT;
-        } else { /* BGR */
         }
-      } else if (G.size > 0) { /* 2 channels */
+      } else if (wordBytes == 8) {
+        if (A.size > 0) { /* 4 channels */
+          if (R.offset == 0) { /* RGBA */
+            if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R64G64B64A64_SFLOAT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64B64A64_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64B64A64_SINT;
+          } else { /* BGRA */
+          }
+        } else if (B.size > 0) { /* 3 channels */
+          if (R.offset == 0) { /* RGB */
+            if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R64G64B64_SFLOAT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64B64_UINT;
+            if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64B64_SINT;
+          } else { /* BGR */
+          }
+        } else if (G.size > 0) { /* 2 channels */
           if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R64G64_SFLOAT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64_UINT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64G64_SINT;
-      } else { /* 1 channel */
+        } else { /* 1 channel */
           if ((r & i_FLOAT_FORMAT_BIT)) return VK_FORMAT_R64_SFLOAT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && !(r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64_UINT;
           if (!(r & i_NORMALIZED_FORMAT_BIT) && (r & i_SIGNED_FORMAT_BIT)) return VK_FORMAT_R64_SINT;
+        }
       }
     }
   }
diff --git a/thirdparty/libktx/lib/dfdutils/interpretdfd.c b/thirdparty/libktx/lib/dfdutils/interpretdfd.c
index 6d25f33a1b..b58a79fb43 100644
--- a/thirdparty/libktx/lib/dfdutils/interpretdfd.c
+++ b/thirdparty/libktx/lib/dfdutils/interpretdfd.c
@@ -29,18 +29,37 @@ static uint32_t bit_ceil(uint32_t x) {
  * @~English
  * @brief Interpret a Data Format Descriptor for a simple format.
  *
- * @param DFD Pointer to a Data Format Descriptor to interpret,
-              described as 32-bit words in native endianness.
-              Note that this is the whole descriptor, not just
-              the basic descriptor block.
- * @param R Information about the decoded red channel or the depth channel, if any.
- * @param G Information about the decoded green channel or the stencil channel, if any.
- * @param B Information about the decoded blue channel, if any.
- * @param A Information about the decoded alpha channel, if any.
- * @param wordBytes Byte size of the channels (unpacked) or total size (packed).
+ * Handles "simple" cases that can be translated to things a GPU can access.
+ * For simplicity, it ignores the compressed formats, which are generally a
+ * single sample (and I believe are all defined to be little-endian in their
+ * in-memory layout, even if some documentation confuses this).  Focuses on
+ * the layout and ignores sRGB except for reporting if that is the transfer
+ * function by way of a bit in the returned value.
+ *
+ * @param[in] DFD Pointer to a Data Format Descriptor to interpret,
+ *            described as 32-bit words in native endianness.
+ *            Note that this is the whole descriptor, not just
+ *            the basic descriptor block.
+ * @param R[in,out] Pointer to struct to receive information about the decoded
+ *                  red channel, the Y channel, if YUV, or the depth channel,
+ *                  if any.
+ * @param G[in,out] Pointer to struct to receive information about the decoded
+ *                  green channel, the U (Cb) channel, if YUV, or the stencil
+ *                  channel, if any.
+ * @param B[in,out] Pointer to struct to receive information about the decoded
+ *                  blue channel, if any or the V (Cr) channel, if YUV.
+ * @param A[in,out] Pointer to struct to receive information about the decoded
+ *                  alpha channel, if any or the second Y channel, if YUV and
+ *                  any.
+ * @param wordBytes[in,out] Pointer to a uint32_t to receive the byte size of
+ *                          the channels (unpacked) or total size (packed).
  *
  * @return An enumerant describing the decoded value,
  *         or an error code in case of failure.
+ *
+ * The mapping of YUV channels to the parameter names used here is based on
+ * the channel ids in @c khr_df.h and is different from the convention used
+ * in format names in the Vulkan specification where G == Y, R = Cr and B = Cb.
  **/
 enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
                                      InterpretedDFDChannel *R,
@@ -49,14 +68,6 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
                                      InterpretedDFDChannel *A,
                                      uint32_t *wordBytes)
 {
-    /* We specifically handle "simple" cases that can be translated */
-    /* to things a GPU can access. For simplicity, we also ignore */
-    /* the compressed formats, which are generally a single sample */
-    /* (and I believe are all defined to be little-endian in their */
-    /* in-memory layout, even if some documentation confuses this). */
-    /* We also just worry about layout and ignore sRGB, since that's */
-    /* trivial to extract anyway. */
-
     /* DFD points to the whole descriptor, not the basic descriptor block. */
     /* Make everything else relative to the basic descriptor block. */
     const uint32_t *BDFDB = DFD+1;
@@ -78,7 +89,7 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
 
     /* First rule out the multiple planes case (trivially) */
     /* - that is, we check that only bytesPlane0 is non-zero. */
-    /* This means we don't handle YUV even if the API could. */
+    /* This means we don't handle multi-plane YUV, even if the API could. */
     /* (We rely on KHR_DF_WORD_BYTESPLANE0..3 being the same and */
     /* KHR_DF_WORD_BYTESPLANE4..7 being the same as a short cut.) */
     if ((BDFDB[KHR_DF_WORD_BYTESPLANE0] & ~KHR_DF_MASK_BYTESPLANE0)
@@ -104,6 +115,8 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
     bool hasSigned = false;
     bool hasFloat = false;
     bool hasNormalized = false;
+    bool hasFixed = false;
+    khr_df_model_e model = KHR_DFDVAL(BDFDB, MODEL);
 
     // Note: We're ignoring 9995, which is weird and worth special-casing
     // rather than trying to generalise to all float formats.
@@ -116,13 +129,23 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
         // (i.e. set to the maximum bit value, and check min value) on
         // the assumption that we're looking at a format which *came* from
         // an API we can support.
-        const bool isNormalized = isFloat ?
-                *(float*) (void*) &BDFDB[KHR_DF_WORD_SAMPLESTART +
+        bool isFixed;
+        bool isNormalized;
+        if (isFloat) {
+            isNormalized = *(float*) (void*) &BDFDB[KHR_DF_WORD_SAMPLESTART +
                     KHR_DF_WORD_SAMPLEWORDS * i +
-                    KHR_DF_SAMPLEWORD_SAMPLEUPPER] != 1.0f :
-                KHR_DFDSVAL(BDFDB, i, SAMPLEUPPER) != 1U;
-
+                    KHR_DF_SAMPLEWORD_SAMPLEUPPER] != 1.0f;
+            isFixed = false;
+        } else {
+            uint32_t sampleUpper = KHR_DFDSVAL(BDFDB, i, SAMPLEUPPER);
+            uint32_t maxVal = 1U << KHR_DFDSVAL(BDFDB, i, BITLENGTH);
+            if (!isSigned) maxVal <<= 1;
+            maxVal--;
+            isFixed = 1U < sampleUpper && sampleUpper < maxVal;
+            isNormalized = !isFixed && sampleUpper != 1U;
+        }
         hasSigned |= isSigned;
+        hasFixed |= isFixed;
         hasFloat |= isFloat;
         // By our definition the normalizedness of a single bit channel (like in RGBA 5:5:5:1)
         // is ambiguous. Ignore these during normalized checks.
@@ -132,9 +155,10 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
     result |= hasSigned ? i_SIGNED_FORMAT_BIT : 0;
     result |= hasFloat ? i_FLOAT_FORMAT_BIT : 0;
     result |= hasNormalized ? i_NORMALIZED_FORMAT_BIT : 0;
+    result |= hasFixed ? i_FIXED_FORMAT_BIT : 0;
 
     // Checks based on color model
-    if (KHR_DFDVAL(BDFDB, MODEL) == KHR_DF_MODEL_YUVSDA) {
+    if (model == KHR_DF_MODEL_YUVSDA) {
         result |= i_NORMALIZED_FORMAT_BIT;
         result |= i_COMPRESSED_FORMAT_BIT;
         result |= i_YUVSDA_FORMAT_BIT;
@@ -165,7 +189,7 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
         *wordBytes = ((result & i_PACKED_FORMAT_BIT) ? 4 : 1) * bit_ceil(largestSampleSize) / 8;
 
     } else if (KHR_DFDVAL(BDFDB, MODEL) == KHR_DF_MODEL_RGBSDA) {
-        /* We only pay attention to sRGB. */
+        /* Check if transfer is sRGB. */
         if (KHR_DFDVAL(BDFDB, TRANSFER) == KHR_DF_TRANSFER_SRGB) result |= i_SRGB_FORMAT_BIT;
 
         /* We only support samples at coordinate 0,0,0,0. */
@@ -175,7 +199,11 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
             if (KHR_DFDSVAL(BDFDB, sampleCounter, SAMPLEPOSITION_ALL))
                 return i_UNSUPPORTED_MULTIPLE_SAMPLE_LOCATIONS;
         }
+    }
 
+    if (model == KHR_DF_MODEL_RGBSDA || model == KHR_DF_MODEL_YUVSDA) {
+        /* The values of the DEPTH and STENCIL tokens are the same for */
+        /* RGBSDA and YUVSDA. */
         /* For Depth/Stencil formats mixed channels are allowed */
         for (uint32_t sampleCounter = 0; sampleCounter < numSamples; ++sampleCounter) {
             switch (KHR_DFDSVAL(BDFDB, sampleCounter, CHANNELID)) {
@@ -206,6 +234,9 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
             }
         }
 
+        /* This all relies on the channel id values for RGB being equal to */
+        /* those for YUV. */
+
         /* Remember: the canonical ordering of samples is to start with */
         /* the lowest bit of the channel/location which touches bit 0 of */
         /* the data, when the latter is concatenated in little-endian order, */
@@ -288,8 +319,20 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
                     currentByteOffset = sampleByteOffset;
                     currentBitLength = sampleBitLength;
                     if (sampleChannelPtr->size) {
-                        /* Uh-oh, we've seen this channel before. */
-                        return i_UNSUPPORTED_NONTRIVIAL_ENDIANNESS;
+                        if (model == KHR_DF_MODEL_YUVSDA && sampleChannel == KHR_DF_CHANNEL_YUVSDA_Y) {
+                            if (sampleChannelPtr == R) {
+                                /* We've got another Y channel. Record details in A. */
+                                if (A->size == 0) {
+                                    sampleChannelPtr = A;
+                                } else {
+                                    /* Uh-oh, we've already got a second Y or an alpha channel. */
+                                    return i_UNSUPPORTED_CHANNEL_TYPES;
+                                }
+                            }
+                        } else {
+                          /* Uh-oh, we've seen this channel before. */
+                          return i_UNSUPPORTED_NONTRIVIAL_ENDIANNESS;
+                        }
                     }
                     /* For now, record the bit offset in little-endian terms, */
                     /* because we may not know to reverse it yet. */
@@ -378,8 +421,20 @@ enum InterpretDFDResult interpretDFD(const uint32_t *DFD,
                     currentByteOffset = sampleByteOffset;
                     currentByteLength = sampleByteLength;
                     if (sampleChannelPtr->size) {
-                        /* Uh-oh, we've seen this channel before. */
-                        return i_UNSUPPORTED_NONTRIVIAL_ENDIANNESS;
+                        if (model == KHR_DF_MODEL_YUVSDA && sampleChannel == KHR_DF_CHANNEL_YUVSDA_Y) {
+                            if (sampleChannelPtr == R) {
+                                /* We've got another Y channel. Record details in A. */
+                                if (A->size == 0) {
+                                    sampleChannelPtr = A;
+                                } else {
+                                    /* Uh-oh, we've already got a second Y or an alpha channel. */
+                                    return i_UNSUPPORTED_CHANNEL_TYPES;
+                                }
+                            }
+                        } else {
+                          /* Uh-oh, we've seen this channel before. */
+                          return i_UNSUPPORTED_NONTRIVIAL_ENDIANNESS;
+                        }
                     }
                     /* For now, record the byte offset in little-endian terms, */
                     /* because we may not know to reverse it yet. */
diff --git a/thirdparty/libktx/lib/dfdutils/queries.c b/thirdparty/libktx/lib/dfdutils/queries.c
index 19488f9e33..66d77cf6ad 100644
--- a/thirdparty/libktx/lib/dfdutils/queries.c
+++ b/thirdparty/libktx/lib/dfdutils/queries.c
@@ -41,15 +41,15 @@ getDFDComponentInfoUnpacked(const uint32_t* DFD, uint32_t* numComponents,
 {
     const uint32_t *BDFDB = DFD+1;
     uint32_t numSamples = KHR_DFDSAMPLECOUNT(BDFDB);
-    uint32_t sampleCounter;
+    uint32_t sampleNumber;
     uint32_t currentChannel = ~0U; /* Don't start matched. */
 
     /* This is specifically for unpacked formats which means the size of */
     /* each component is the same. */
     *numComponents = 0;
-    for (sampleCounter = 0; sampleCounter < numSamples; ++sampleCounter) {
-        uint32_t sampleByteLength = (KHR_DFDSVAL(BDFDB, sampleCounter, BITLENGTH) + 1) >> 3U;
-        uint32_t sampleChannel = KHR_DFDSVAL(BDFDB, sampleCounter, CHANNELID);
+    for (sampleNumber = 0; sampleNumber < numSamples; ++sampleNumber) {
+        uint32_t sampleByteLength = (KHR_DFDSVAL(BDFDB, sampleNumber, BITLENGTH) + 1) >> 3U;
+        uint32_t sampleChannel = KHR_DFDSVAL(BDFDB, sampleNumber, CHANNELID);
 
         if (sampleChannel == currentChannel) {
             /* Continuation of the same channel. */
@@ -85,10 +85,10 @@ uint32_t getDFDNumComponents(const uint32_t* DFD)
     uint32_t currentChannel = ~0U; /* Don't start matched. */
     uint32_t numComponents = 0;
     uint32_t numSamples = KHR_DFDSAMPLECOUNT(BDFDB);
-    uint32_t sampleCounter;
+    uint32_t sampleNumber;
 
-    for (sampleCounter = 0; sampleCounter < numSamples; ++sampleCounter) {
-        uint32_t sampleChannel = KHR_DFDSVAL(BDFDB, sampleCounter, CHANNELID);
+    for (sampleNumber = 0; sampleNumber < numSamples; ++sampleNumber) {
+        uint32_t sampleChannel = KHR_DFDSVAL(BDFDB, sampleNumber, CHANNELID);
         if (sampleChannel != currentChannel) {
             numComponents++;
             currentChannel = sampleChannel;
@@ -97,50 +97,83 @@ uint32_t getDFDNumComponents(const uint32_t* DFD)
     return numComponents;
 }
 
+
 /**
  * @~English
- * @brief Recreate the value of bytesPlane0 from sample info.
+ * @brief Reconstruct the value of bytesPlane0 from sample info.
  *
- * This can be use to recreate the value of bytesPlane0 for data that
- * has been variable-rate compressed so has bytesPlane0 = 0.  For DFDs
- * that are valid for KTX files. Little-endian data only and no multi-plane
- * formats.
+ * Reconstruct the value for data that has been variable-rate compressed so
+ * has bytesPlane0 = 0.  For DFDs that are valid for KTX files. Little-endian
+ * data only and no multi-plane formats.
  *
  * @param DFD Pointer to a Data Format Descriptor for which,
  *            described as 32-bit words in native endianness.
  *            Note that this is the whole descriptor, not just
  *            the basic descriptor block.
- * @param bytesPlane0  pointer to a 32-bit word in which the recreated
- *                    value of bytesPlane0 will be written.
  */
-void
-recreateBytesPlane0FromSampleInfo(const uint32_t* DFD, uint32_t* bytesPlane0)
+uint32_t
+reconstructDFDBytesPlane0FromSamples(const uint32_t* DFD)
 {
     const uint32_t *BDFDB = DFD+1;
     uint32_t numSamples = KHR_DFDSAMPLECOUNT(BDFDB);
-    uint32_t sampleCounter;
+    uint32_t sampleNumber;
 
     uint32_t bitsPlane0 = 0;
-    uint32_t* bitOffsets = malloc(sizeof(uint32_t) * numSamples);
-    memset(bitOffsets, -1, sizeof(uint32_t) * numSamples);
-    for (sampleCounter = 0; sampleCounter < numSamples; ++sampleCounter) {
-        uint32_t sampleBitOffset = KHR_DFDSVAL(BDFDB, sampleCounter, BITOFFSET);
-        /* The sample bitLength field stores the bit length - 1. */
-        uint32_t sampleBitLength = KHR_DFDSVAL(BDFDB, sampleCounter, BITLENGTH) + 1;
-        uint32_t i;
-        for (i = 0; i < numSamples; i++) {
-            if (sampleBitOffset == bitOffsets[i]) {
-                // This sample is being repeated as in e.g. RGB9E5.
-                break;
+    int32_t largestOffset = 0;
+    uint32_t sampleNumberWithLargestOffset = 0;
+
+    // Special case these depth{,-stencil} formats. The unused bits are
+    // in the MSBs so have no visibility in the DFD therefore the max offset
+    // algorithm below returns a value that is too small.
+    if (KHR_DFDSVAL(BDFDB, 0, CHANNELID) == KHR_DF_CHANNEL_COMMON_DEPTH) {
+        if (numSamples == 1) {
+            if (KHR_DFDSVAL(BDFDB, 0, BITLENGTH) + 1 == 24) {
+                // X8_D24_UNORM_PACK32,
+                return 4;
+            }
+        } else if (numSamples == 2) {
+            if (KHR_DFDSVAL(BDFDB, 0, BITLENGTH) + 1 == 16) {
+                // D16_UNORM_S8_UINT
+                return 4;
+            }
+            if (KHR_DFDSVAL(BDFDB, 0, BITLENGTH) + 1 == 32
+                && KHR_DFDSVAL(BDFDB, 1, CHANNELID) == KHR_DF_CHANNEL_COMMON_STENCIL) {
+                // D32_SFLOAT_S8_UINT
+                return 8;
             }
         }
-        if (i == numSamples) {
-            // Previously unseen bitOffset. Bump size.
-            bitsPlane0 += sampleBitLength;
-            bitOffsets[sampleCounter] = sampleBitOffset;
+    }
+    for (sampleNumber = 0; sampleNumber < numSamples; ++sampleNumber) {
+        int32_t sampleBitOffset = KHR_DFDSVAL(BDFDB, sampleNumber, BITOFFSET);
+        if (sampleBitOffset > largestOffset) {
+            largestOffset = sampleBitOffset;
+            sampleNumberWithLargestOffset = sampleNumber;
         }
     }
-    free(bitOffsets);
-    *bytesPlane0 = bitsPlane0 >> 3U;
+
+    /* The sample bitLength field stores the bit length - 1. */
+    uint32_t sampleBitLength = KHR_DFDSVAL(BDFDB, sampleNumberWithLargestOffset, BITLENGTH) + 1;
+    bitsPlane0 = largestOffset + sampleBitLength;
+    return bitsPlane0 >> 3U;
 }
 
+/**
+ * @~English
+ * @brief Reconstruct the value of bytesPlane0 from sample info.
+ *
+ * @see reconstructDFDBytesPlane0FromSamples for details.
+ * @deprecated For backward comparibility only. Use
+ *             reconstructDFDBytesPlane0FromSamples.
+ *
+ * @param DFD Pointer to a Data Format Descriptor for which,
+ *            described as 32-bit words in native endianness.
+ *            Note that this is the whole descriptor, not just
+ *            the basic descriptor block.
+ * @param bytesPlane0  pointer to a 32-bit word in which the recreated
+ *                    value of bytesPlane0 will be written.
+ */
+void
+recreateBytesPlane0FromSampleInfo(const uint32_t* DFD, uint32_t* bytesPlane0)
+{
+    *bytesPlane0 = reconstructDFDBytesPlane0FromSamples(DFD);
+}
diff --git a/thirdparty/libktx/lib/dfdutils/vk2dfd.inl b/thirdparty/libktx/lib/dfdutils/vk2dfd.inl
index 25c7a2c238..3398441e8c 100644
--- a/thirdparty/libktx/lib/dfdutils/vk2dfd.inl
+++ b/thirdparty/libktx/lib/dfdutils/vk2dfd.inl
@@ -277,68 +277,68 @@ case VK_FORMAT_ASTC_12x10_SRGB_BLOCK: return createDFDCompressed(c_ASTC, 12, 10,
 case VK_FORMAT_ASTC_12x12_UNORM_BLOCK: return createDFDCompressed(c_ASTC, 12, 12, 1, s_UNORM);
 case VK_FORMAT_ASTC_12x12_SRGB_BLOCK: return createDFDCompressed(c_ASTC, 12, 12, 1, s_SRGB);
 case VK_FORMAT_G8B8G8R8_422_UNORM: {
-    int channels[] = {0, 1, 0, 2}; int bits[] = {8, 8, 8, 8}; int paddings[] = {0, 0, 0, 0};
+    int channels[] = {0, 1, 0, 2}; int bits[] = {8, 8, 8, 8}; int shiftBits[] = {0, 0, 0, 0};
     int position_xs[] = {64, 64, 192, 64}; int position_ys[] = {128, 128, 128, 128};
-    return createDFD422(0, 4, bits, paddings, channels, position_xs, position_ys, s_UNORM);
+    return createDFD422(0, 4, bits, shiftBits, channels, position_xs, position_ys, s_UNORM);
 }
 case VK_FORMAT_B8G8R8G8_422_UNORM: {
-    int channels[] = {1, 0, 2, 0}; int bits[] = {8, 8, 8, 8}; int paddings[] = {0, 0, 0, 0};
+    int channels[] = {1, 0, 2, 0}; int bits[] = {8, 8, 8, 8}; int shiftBits[] = {0, 0, 0, 0};
     int position_xs[] = {64, 64, 64, 192}; int position_ys[] = {128, 128, 128, 128};
-    return createDFD422(0, 4, bits, paddings, channels, position_xs, position_ys, s_UNORM);
+    return createDFD422(0, 4, bits, shiftBits, channels, position_xs, position_ys, s_UNORM);
 }
 case VK_FORMAT_R10X6_UNORM_PACK16: {
-    int channels[] = {0}; int bits[] = {10}; int paddings[] = {6};
-    return createDFDPackedPadded(0, 1, bits, paddings, channels, s_UNORM);
+    int channels[] = {0}; int bits[] = {10}; int shiftBits[] = {6};
+    return createDFDPackedShifted(0, 1, bits, shiftBits, channels, s_UNORM);
 }
 case VK_FORMAT_R10X6G10X6_UNORM_2PACK16: {
-    int channels[] = {0, 1}; int bits[] = {10, 10}; int paddings[] = {6, 6};
-    return createDFDPackedPadded(0, 2, bits, paddings, channels, s_UNORM);
+    int channels[] = {0, 1}; int bits[] = {10, 10}; int shiftBits[] = {6, 6};
+    return createDFDPackedShifted(0, 2, bits, shiftBits, channels, s_UNORM);
 }
 case VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16: {
-    int channels[] = {0, 1, 2, 3}; int bits[] = {10, 10, 10, 10}; int paddings[] = {6, 6, 6, 6};
-    return createDFDPackedPadded(0, 4, bits, paddings, channels, s_UNORM);
+    int channels[] = {0, 1, 2, 3}; int bits[] = {10, 10, 10, 10}; int shiftBits[] = {6, 6, 6, 6};
+    return createDFDPackedShifted(0, 4, bits, shiftBits, channels, s_UNORM);
 }
 case VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16: {
-    int channels[] = {0, 1, 0, 2}; int bits[] = {10, 10, 10, 10}; int paddings[] = {6, 6, 6, 6};
+    int channels[] = {0, 1, 0, 2}; int bits[] = {10, 10, 10, 10}; int shiftBits[] = {6, 6, 6, 6};
     int position_xs[] = {64, 64, 192, 64}; int position_ys[] = {128, 128, 128, 128};
-    return createDFD422(0, 4, bits, paddings, channels, position_xs, position_ys, s_UNORM);
+    return createDFD422(0, 4, bits, shiftBits, channels, position_xs, position_ys, s_UNORM);
 }
 case VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16: {
-    int channels[] = {1, 0, 2, 0}; int bits[] = {10, 10, 10, 10}; int paddings[] = {6, 6, 6, 6};
+    int channels[] = {1, 0, 2, 0}; int bits[] = {10, 10, 10, 10}; int shiftBits[] = {6, 6, 6, 6};
     int position_xs[] = {64, 64, 64, 192}; int position_ys[] = {128, 128, 128, 128};
-    return createDFD422(0, 4, bits, paddings, channels, position_xs, position_ys, s_UNORM);
+    return createDFD422(0, 4, bits, shiftBits, channels, position_xs, position_ys, s_UNORM);
 }
 case VK_FORMAT_R12X4_UNORM_PACK16: {
-    int channels[] = {0}; int bits[] = {12}; int paddings[] = {4};
-    return createDFDPackedPadded(0, 1, bits, paddings, channels, s_UNORM);
+    int channels[] = {0}; int bits[] = {12}; int shiftBits[] = {4};
+    return createDFDPackedShifted(0, 1, bits, shiftBits, channels, s_UNORM);
 }
 case VK_FORMAT_R12X4G12X4_UNORM_2PACK16: {
-    int channels[] = {0, 1}; int bits[] = {12, 12}; int paddings[] = {4, 4};
-    return createDFDPackedPadded(0, 2, bits, paddings, channels, s_UNORM);
+    int channels[] = {0, 1}; int bits[] = {12, 12}; int shiftBits[] = {4, 4};
+    return createDFDPackedShifted(0, 2, bits, shiftBits, channels, s_UNORM);
 }
 case VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16: {
-    int channels[] = {0, 1, 2, 3}; int bits[] = {12, 12, 12, 12}; int paddings[] = {4, 4, 4, 4};
-    return createDFDPackedPadded(0, 4, bits, paddings, channels, s_UNORM);
+    int channels[] = {0, 1, 2, 3}; int bits[] = {12, 12, 12, 12}; int shiftBits[] = {4, 4, 4, 4};
+    return createDFDPackedShifted(0, 4, bits, shiftBits, channels, s_UNORM);
 }
 case VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16: {
-    int channels[] = {0, 1, 0, 2}; int bits[] = {12, 12, 12, 12}; int paddings[] = {4, 4, 4, 4};
+    int channels[] = {0, 1, 0, 2}; int bits[] = {12, 12, 12, 12}; int shiftBits[] = {4, 4, 4, 4};
     int position_xs[] = {64, 64, 192, 64}; int position_ys[] = {128, 128, 128, 128};
-    return createDFD422(0, 4, bits, paddings, channels, position_xs, position_ys, s_UNORM);
+    return createDFD422(0, 4, bits, shiftBits, channels, position_xs, position_ys, s_UNORM);
 }
 case VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16: {
-    int channels[] = {1, 0, 2, 0}; int bits[] = {12, 12, 12, 12}; int paddings[] = {4, 4, 4, 4};
+    int channels[] = {1, 0, 2, 0}; int bits[] = {12, 12, 12, 12}; int shiftBits[] = {4, 4, 4, 4};
     int position_xs[] = {64, 64, 64, 192}; int position_ys[] = {128, 128, 128, 128};
-    return createDFD422(0, 4, bits, paddings, channels, position_xs, position_ys, s_UNORM);
+    return createDFD422(0, 4, bits, shiftBits, channels, position_xs, position_ys, s_UNORM);
 }
 case VK_FORMAT_G16B16G16R16_422_UNORM: {
-    int channels[] = {0, 1, 0, 2}; int bits[] = {16, 16, 16, 16}; int paddings[] = {0, 0, 0, 0};
+    int channels[] = {0, 1, 0, 2}; int bits[] = {16, 16, 16, 16}; int shiftBits[] = {0, 0, 0, 0};
     int position_xs[] = {64, 64, 192, 64}; int position_ys[] = {128, 128, 128, 128};
-    return createDFD422(0, 4, bits, paddings, channels, position_xs, position_ys, s_UNORM);
+    return createDFD422(0, 4, bits, shiftBits, channels, position_xs, position_ys, s_UNORM);
 }
 case VK_FORMAT_B16G16R16G16_422_UNORM: {
-    int channels[] = {1, 0, 2, 0}; int bits[] = {16, 16, 16, 16}; int paddings[] = {0, 0, 0, 0};
+    int channels[] = {1, 0, 2, 0}; int bits[] = {16, 16, 16, 16}; int shiftBits[] = {0, 0, 0, 0};
     int position_xs[] = {64, 64, 64, 192}; int position_ys[] = {128, 128, 128, 128};
-    return createDFD422(0, 4, bits, paddings, channels, position_xs, position_ys, s_UNORM);
+    return createDFD422(0, 4, bits, shiftBits, channels, position_xs, position_ys, s_UNORM);
 }
 case VK_FORMAT_A4R4G4B4_UNORM_PACK16: {
     int channels[] = {2,1,0,3}; int bits[] = {4,4,4,4};
@@ -402,6 +402,7 @@ case VK_FORMAT_ASTC_6x6x6_UNORM_BLOCK_EXT: return createDFDCompressed(c_ASTC, 6,
 case VK_FORMAT_ASTC_6x6x6_SRGB_BLOCK_EXT: return createDFDCompressed(c_ASTC, 6, 6, 6, s_SRGB);
 case VK_FORMAT_ASTC_6x6x6_SFLOAT_BLOCK_EXT: return createDFDCompressed(c_ASTC, 6, 6, 6, s_SFLOAT);
 #endif
+case VK_FORMAT_R16G16_S10_5_NV: return createDFDUnpacked(0, 2, 2, 0, s_S10_5);
 case VK_FORMAT_A1B5G5R5_UNORM_PACK16_KHR: {
     int channels[] = {0,1,2,3}; int bits[] = {5,5,5,1};
     return createDFDPacked(0, 4, bits, channels, s_UNORM);
diff --git a/thirdparty/libktx/lib/gl_format.h b/thirdparty/libktx/lib/gl_format.h
index 3986bc430c..44abd2c6d4 100644
--- a/thirdparty/libktx/lib/gl_format.h
+++ b/thirdparty/libktx/lib/gl_format.h
@@ -1,4 +1,4 @@
-/*
+/*
 ================================================================================================
 
 Description	:	OpenGL formats/types and properties.
@@ -70,7 +70,6 @@ static inline GLenum glGetFormatFromInternalFormat( const GLenum internalFormat
 static inline GLenum glGetTypeFromInternalFormat( const GLenum internalFormat );
 static inline void glGetFormatSize( const GLenum internalFormat, GlFormatSize * pFormatSize );
 static inline unsigned int glGetTypeSizeFromType( const GLenum type );
-static inline GLenum glGetInternalFormatFromVkFormat ( VkFormat format );
 
 MODIFICATIONS for use in libktx
 ===============================
@@ -79,7 +78,6 @@ MODIFICATIONS for use in libktx
 2019.3.09 #if 0 around GL type declarations.            〃
 2019.5.30 Use common ktxFormatSize to return results.         〃
 2019.5.30 Return blockSizeInBits 0 for default case of glGetFormatSize. 〃
-2019.5.30 Added glGetInternalFormatFromVkFormat.            〃
 
 ================================================================================================
 */
@@ -101,6 +99,7 @@ MODIFICATIONS for use in libktx
 #endif // __cplusplus
 #endif
 
+
 /*
 ===========================================================================
 Avoid warnings or even errors when using strict C99. "Redefinition of
@@ -2436,219 +2435,4 @@ static inline void glGetFormatSize( const GLenum internalFormat, ktxFormatSize *
 	}
 }
 
-static inline GLint glGetInternalFormatFromVkFormat( VkFormat vkFormat )
-{
-    switch ( vkFormat )
-    {
-        //
-        // 8 bits per component
-        //
-        case VK_FORMAT_R8_UNORM:                   return GL_R8;                // 1-component, 8-bit unsigned normalized
-        case VK_FORMAT_R8G8_UNORM:                 return GL_RG8;               // 2-component, 8-bit unsigned normalized
-        case VK_FORMAT_R8G8B8_UNORM:               return GL_RGB8;              // 3-component, 8-bit unsigned normalized
-        case VK_FORMAT_R8G8B8A8_UNORM:             return GL_RGBA8;             // 4-component, 8-bit unsigned normalized
-
-        case VK_FORMAT_R8_SNORM:                   return GL_R8_SNORM;          // 1-component, 8-bit signed normalized
-        case VK_FORMAT_R8G8_SNORM:                 return GL_RG8_SNORM;         // 2-component, 8-bit signed normalized
-        case VK_FORMAT_R8G8B8_SNORM:               return GL_RGB8_SNORM;        // 3-component, 8-bit signed normalized
-        case VK_FORMAT_R8G8B8A8_SNORM:             return GL_RGBA8_SNORM;       // 4-component, 8-bit signed normalized
-
-        case VK_FORMAT_R8_UINT:                    return GL_R8UI;              // 1-component, 8-bit unsigned integer
-        case VK_FORMAT_R8G8_UINT:                  return GL_RG8UI;             // 2-component, 8-bit unsigned integer
-        case VK_FORMAT_R8G8B8_UINT:                return GL_RGB8UI;            // 3-component, 8-bit unsigned integer
-        case VK_FORMAT_R8G8B8A8_UINT:              return GL_RGBA8UI;           // 4-component, 8-bit unsigned integer
-
-        case VK_FORMAT_R8_SINT:                    return GL_R8I;               // 1-component, 8-bit signed integer
-        case VK_FORMAT_R8G8_SINT:                  return GL_RG8I;              // 2-component, 8-bit signed integer
-        case VK_FORMAT_R8G8B8_SINT:                return GL_RGB8I;             // 3-component, 8-bit signed integer
-        case VK_FORMAT_R8G8B8A8_SINT:              return GL_RGBA8I;            // 4-component, 8-bit signed integer
-
-        case VK_FORMAT_R8_SRGB:                    return GL_SR8;               // 1-component, 8-bit sRGB
-        case VK_FORMAT_R8G8_SRGB:                  return GL_SRG8;              // 2-component, 8-bit sRGB
-        case VK_FORMAT_R8G8B8_SRGB:                return GL_SRGB8;             // 3-component, 8-bit sRGB
-        case VK_FORMAT_R8G8B8A8_SRGB:              return GL_SRGB8_ALPHA8;      // 4-component, 8-bit sRGB
-
-        //
-        // 16 bits per component
-        //
-        case VK_FORMAT_R16_UNORM:                  return GL_R16;               // 1-component, 16-bit unsigned normalized
-        case VK_FORMAT_R16G16_UNORM:               return GL_RG16;              // 2-component, 16-bit unsigned normalized
-        case VK_FORMAT_R16G16B16_UNORM:            return GL_RGB16;             // 3-component, 16-bit unsigned normalized
-        case VK_FORMAT_R16G16B16A16_UNORM:         return GL_RGBA16;            // 4-component, 16-bit unsigned normalized
-
-        case VK_FORMAT_R16_SNORM:                  return GL_R16_SNORM;         // 1-component, 16-bit signed normalized
-        case VK_FORMAT_R16G16_SNORM:               return GL_RG16_SNORM;        // 2-component, 16-bit signed normalized
-        case VK_FORMAT_R16G16B16_SNORM:            return GL_RGB16_SNORM;       // 3-component, 16-bit signed normalized
-        case VK_FORMAT_R16G16B16A16_SNORM:         return GL_RGBA16_SNORM;      // 4-component, 16-bit signed normalized
-
-        case VK_FORMAT_R16_UINT:                   return GL_R16UI;             // 1-component, 16-bit unsigned integer
-        case VK_FORMAT_R16G16_UINT:                return GL_RG16UI;            // 2-component, 16-bit unsigned integer
-        case VK_FORMAT_R16G16B16_UINT:             return GL_RGB16UI;           // 3-component, 16-bit unsigned integer
-        case VK_FORMAT_R16G16B16A16_UINT:          return GL_RGBA16UI;          // 4-component, 16-bit unsigned integer
-
-        case VK_FORMAT_R16_SINT:                   return GL_R16I;              // 1-component, 16-bit signed integer
-        case VK_FORMAT_R16G16_SINT:                return GL_RG16I;             // 2-component, 16-bit signed integer
-        case VK_FORMAT_R16G16B16_SINT:             return GL_RGB16I;            // 3-component, 16-bit signed integer
-        case VK_FORMAT_R16G16B16A16_SINT:          return GL_RGBA16I;           // 4-component, 16-bit signed integer
-
-        case VK_FORMAT_R16_SFLOAT:                 return GL_R16F;              // 1-component, 16-bit floating-point
-        case VK_FORMAT_R16G16_SFLOAT:              return GL_RG16F;             // 2-component, 16-bit floating-point
-        case VK_FORMAT_R16G16B16_SFLOAT:           return GL_RGB16F;            // 3-component, 16-bit floating-point
-        case VK_FORMAT_R16G16B16A16_SFLOAT:        return GL_RGBA16F;           // 4-component, 16-bit floating-point
-
-        //
-        // 32 bits per component
-        //
-        case VK_FORMAT_R32_UINT:                   return GL_R32UI;             // 1-component, 32-bit unsigned integer
-        case VK_FORMAT_R32G32_UINT:                return GL_RG32UI;            // 2-component, 32-bit unsigned integer
-        case VK_FORMAT_R32G32B32_UINT:             return GL_RGB32UI;           // 3-component, 32-bit unsigned integer
-        case VK_FORMAT_R32G32B32A32_UINT:          return GL_RGBA32UI;          // 4-component, 32-bit unsigned integer
-
-        case VK_FORMAT_R32_SINT:                   return GL_R32I;              // 1-component, 32-bit signed integer
-        case VK_FORMAT_R32G32_SINT:                return GL_RG32I;             // 2-component, 32-bit signed integer
-        case VK_FORMAT_R32G32B32_SINT:             return GL_RGB32I;            // 3-component, 32-bit signed integer
-        case VK_FORMAT_R32G32B32A32_SINT:          return GL_RGBA32I;           // 4-component, 32-bit signed integer
-
-        case VK_FORMAT_R32_SFLOAT:                 return GL_R32F;              // 1-component, 32-bit floating-point
-        case VK_FORMAT_R32G32_SFLOAT:              return GL_RG32F;             // 2-component, 32-bit floating-point
-        case VK_FORMAT_R32G32B32_SFLOAT:           return GL_RGB32F;            // 3-component, 32-bit floating-point
-        case VK_FORMAT_R32G32B32A32_SFLOAT:        return GL_RGBA32F;           // 4-component, 32-bit floating-point
-
-        //
-        // Packed
-        //
-        case VK_FORMAT_R5G5B5A1_UNORM_PACK16:      return GL_RGB5;              // 3-component 5:5:5,       unsigned normalized
-        case VK_FORMAT_R5G6B5_UNORM_PACK16:        return GL_RGB565;            // 3-component 5:6:5,       unsigned normalized
-        case VK_FORMAT_R4G4B4A4_UNORM_PACK16:      return GL_RGBA4;             // 4-component 4:4:4:4,     unsigned normalized
-        case VK_FORMAT_A1R5G5B5_UNORM_PACK16:      return GL_RGB5_A1;           // 4-component 5:5:5:1,     unsigned normalized
-        case VK_FORMAT_A2R10G10B10_UNORM_PACK32:   return GL_RGB10_A2;          // 4-component 10:10:10:2,  unsigned normalized
-        case VK_FORMAT_A2R10G10B10_UINT_PACK32:    return GL_RGB10_A2UI;        // 4-component 10:10:10:2,  unsigned integer
-        case VK_FORMAT_B10G11R11_UFLOAT_PACK32:    return GL_R11F_G11F_B10F;    // 3-component 11:11:10,    floating-point
-        case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:     return GL_RGB9_E5;           // 3-component/exp 9:9:9/5, floating-point
-
-        //
-        // S3TC/DXT/BC
-        //
-
-        case VK_FORMAT_BC1_RGB_UNORM_BLOCK:        return GL_COMPRESSED_RGB_S3TC_DXT1_EXT;                  // line through 3D space, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:       return GL_COMPRESSED_RGBA_S3TC_DXT1_EXT;                 // line through 3D space plus 1-bit alpha, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_BC2_UNORM_BLOCK:            return GL_COMPRESSED_RGBA_S3TC_DXT3_EXT;                 // line through 3D space plus line through 1D space, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_BC3_UNORM_BLOCK:            return GL_COMPRESSED_RGBA_S3TC_DXT5_EXT;                 // line through 3D space plus 4-bit alpha, 4x4 blocks, unsigned normalized
-
-        case VK_FORMAT_BC1_RGB_SRGB_BLOCK:         return GL_COMPRESSED_SRGB_S3TC_DXT1_EXT;                 // line through 3D space, 4x4 blocks, sRGB
-        case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:        return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT;           // line through 3D space plus 1-bit alpha, 4x4 blocks, sRGB
-        case VK_FORMAT_BC2_SRGB_BLOCK:             return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT;           // line through 3D space plus line through 1D space, 4x4 blocks, sRGB
-        case VK_FORMAT_BC3_SRGB_BLOCK:             return GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT;           // line through 3D space plus 4-bit alpha, 4x4 blocks, sRGB
-
-        case VK_FORMAT_BC4_UNORM_BLOCK:            return GL_COMPRESSED_RED_RGTC1;                          // line through 1D space, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_BC5_UNORM_BLOCK:            return GL_COMPRESSED_RG_RGTC2;                           // two lines through 1D space, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_BC4_SNORM_BLOCK:            return GL_COMPRESSED_SIGNED_RED_RGTC1;                   // line through 1D space, 4x4 blocks, signed normalized
-        case VK_FORMAT_BC5_SNORM_BLOCK:            return GL_COMPRESSED_SIGNED_RG_RGTC2;                    // two lines through 1D space, 4x4 blocks, signed normalized
-
-        case VK_FORMAT_BC6H_UFLOAT_BLOCK:          return GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT;            // 3-component, 4x4 blocks, unsigned floating-point
-        case VK_FORMAT_BC6H_SFLOAT_BLOCK:          return GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;              // 3-component, 4x4 blocks, signed floating-point
-        case VK_FORMAT_BC7_UNORM_BLOCK:            return GL_COMPRESSED_RGBA_BPTC_UNORM;                    // 4-component, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_BC7_SRGB_BLOCK:             return GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM;              // 4-component, 4x4 blocks, sRGB
-
-        //
-        // ETC
-        //
-        case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:    return GL_COMPRESSED_RGB8_ETC2;                          // 3-component ETC2, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:  return GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2;      // 4-component ETC2 with 1-bit alpha, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:  return GL_COMPRESSED_RGBA8_ETC2_EAC;                     // 4-component ETC2, 4x4 blocks, unsigned normalized
-
-        case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:     return GL_COMPRESSED_SRGB8_ETC2;                         // 3-component ETC2, 4x4 blocks, sRGB
-        case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:   return GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2;     // 4-component ETC2 with 1-bit alpha, 4x4 blocks, sRGB
-        case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:   return GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC;              // 4-component ETC2, 4x4 blocks, sRGB
-
-        case VK_FORMAT_EAC_R11_UNORM_BLOCK:        return GL_COMPRESSED_R11_EAC;                            // 1-component ETC, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:     return GL_COMPRESSED_RG11_EAC;                           // 2-component ETC, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_EAC_R11_SNORM_BLOCK:        return GL_COMPRESSED_SIGNED_R11_EAC;                     // 1-component ETC, 4x4 blocks, signed normalized
-        case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:     return GL_COMPRESSED_SIGNED_RG11_EAC;                    // 2-component ETC, 4x4 blocks, signed normalized
-
-        //
-        // PVRTC
-        //
-        case VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG:    return GL_COMPRESSED_RGBA_PVRTC_2BPPV1_IMG;           // 3- or 4-component PVRTC, 16x8 blocks, unsigned normalized
-        case VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG:    return GL_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG;           // 3- or 4-component PVRTC,  8x8 blocks, unsigned normalized
-        case VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG:    return GL_COMPRESSED_RGBA_PVRTC_2BPPV2_IMG;           // 3- or 4-component PVRTC, 16x8 blocks, unsigned normalized
-        case VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG:    return GL_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG;           // 3- or 4-component PVRTC,  4x4 blocks, unsigned normalized
-
-        case VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG:     return GL_COMPRESSED_SRGB_ALPHA_PVRTC_2BPPV1_EXT;     // 4-component PVRTC, 16x8 blocks, sRGB
-        case VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG:     return GL_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV1_EXT;     // 4-component PVRTC,  8x8 blocks, sRGB
-        case VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG:     return GL_COMPRESSED_SRGB_ALPHA_PVRTC_2BPPV2_IMG;     // 4-component PVRTC,  8x4 blocks, sRGB
-        case VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG:     return GL_COMPRESSED_SRGB_ALPHA_PVRTC_4BPPV2_IMG;     // 4-component PVRTC,  4x4 blocks, sRGB
-
-        //
-        // ASTC
-        //
-        case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:           return GL_COMPRESSED_RGBA_ASTC_4x4_KHR;                // 4-component ASTC, 4x4 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:           return GL_COMPRESSED_RGBA_ASTC_5x4_KHR;                // 4-component ASTC, 5x4 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:           return GL_COMPRESSED_RGBA_ASTC_5x5_KHR;                // 4-component ASTC, 5x5 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:           return GL_COMPRESSED_RGBA_ASTC_6x5_KHR;                // 4-component ASTC, 6x5 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:           return GL_COMPRESSED_RGBA_ASTC_6x6_KHR;                // 4-component ASTC, 6x6 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:           return GL_COMPRESSED_RGBA_ASTC_8x5_KHR;                // 4-component ASTC, 8x5 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:           return GL_COMPRESSED_RGBA_ASTC_8x6_KHR;                // 4-component ASTC, 8x6 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:           return GL_COMPRESSED_RGBA_ASTC_8x8_KHR;                // 4-component ASTC, 8x8 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:          return GL_COMPRESSED_RGBA_ASTC_10x5_KHR;               // 4-component ASTC, 10x5 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:          return GL_COMPRESSED_RGBA_ASTC_10x6_KHR;               // 4-component ASTC, 10x6 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:          return GL_COMPRESSED_RGBA_ASTC_10x8_KHR;               // 4-component ASTC, 10x8 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:         return GL_COMPRESSED_RGBA_ASTC_10x10_KHR;              // 4-component ASTC, 10x10 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:         return GL_COMPRESSED_RGBA_ASTC_12x10_KHR;              // 4-component ASTC, 12x10 blocks, unsigned normalized
-        case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:         return GL_COMPRESSED_RGBA_ASTC_12x12_KHR;              // 4-component ASTC, 12x12 blocks, unsigned normalized
-
-        case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:            return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR;        // 4-component ASTC, 4x4 blocks, sRGB
-        case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:            return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR;        // 4-component ASTC, 5x4 blocks, sRGB
-        case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:            return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR;        // 4-component ASTC, 5x5 blocks, sRGB
-        case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:            return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR;        // 4-component ASTC, 6x5 blocks, sRGB
-        case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:            return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR;        // 4-component ASTC, 6x6 blocks, sRGB
-        case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:            return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR;        // 4-component ASTC, 8x5 blocks, sRGB
-        case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:            return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR;        // 4-component ASTC, 8x6 blocks, sRGB
-        case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:            return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR;        // 4-component ASTC, 8x8 blocks, sRGB
-        case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:           return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR;       // 4-component ASTC, 10x5 blocks, sRGB
-        case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:           return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR;       // 4-component ASTC, 10x6 blocks, sRGB
-        case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:           return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR;       // 4-component ASTC, 10x8 blocks, sRGB
-        case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:          return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR;      // 4-component ASTC, 10x10 blocks, sRGB
-        case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:          return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR;      // 4-component ASTC, 12x10 blocks, sRGB
-        case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:          return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR;      // 4-component ASTC, 12x12 blocks, sRGB
-
-        // XXX FIXME Update once Vulkan ASTC HDR & 3D extensions are released.
-#if 0
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_3x3x3_OES;               // 4-component ASTC, 3x3x3 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_4x3x3_OES;               // 4-component ASTC, 4x3x3 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_4x4x3_OES;               // 4-component ASTC, 4x4x3 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_4x4x4_OES;               // 4-component ASTC, 4x4x4 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_5x4x4_OES;               // 4-component ASTC, 5x4x4 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_5x5x4_OES;               // 4-component ASTC, 5x5x4 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_5x5x5_OES;               // 4-component ASTC, 5x5x5 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_6x5x5_OES;               // 4-component ASTC, 6x5x5 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_6x6x5_OES;               // 4-component ASTC, 6x6x5 blocks, unsigned normalized
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_RGBA_ASTC_6x6x6_OES;               // 4-component ASTC, 6x6x6 blocks, unsigned normalized
-
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_3x3x3_OES;       // 4-component ASTC, 3x3x3 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x3x3_OES;       // 4-component ASTC, 4x3x3 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x3_OES;       // 4-component ASTC, 4x4x3 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4x4_OES;       // 4-component ASTC, 4x4x4 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4x4_OES;       // 4-component ASTC, 5x4x4 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x4_OES;       // 4-component ASTC, 5x5x4 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5x5_OES;       // 4-component ASTC, 5x5x5 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5x5_OES;       // 4-component ASTC, 6x5x5 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x5_OES;       // 4-component ASTC, 6x6x5 blocks, sRGB
-        case VK_FORMAT_UNDEFINED:                      return GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6x6_OES;       // 4-component ASTC, 6x6x6 blocks, sRGB
-#endif
-
-        //
-        // Depth/stencil
-        //
-        case VK_FORMAT_D16_UNORM:                      return GL_DEPTH_COMPONENT16;
-        case VK_FORMAT_X8_D24_UNORM_PACK32:            return GL_DEPTH_COMPONENT24;
-        case VK_FORMAT_D32_SFLOAT:                     return GL_DEPTH_COMPONENT32F;
-        case VK_FORMAT_S8_UINT:                        return GL_STENCIL_INDEX8;
-        case VK_FORMAT_D24_UNORM_S8_UINT:              return GL_DEPTH24_STENCIL8;
-        case VK_FORMAT_D32_SFLOAT_S8_UINT:             return GL_DEPTH32F_STENCIL8;
-
-        default:                                       return GL_INVALID_VALUE;
-    }
-}
-
 #endif // !GL_FORMAT_H
diff --git a/thirdparty/libktx/lib/texture2.c b/thirdparty/libktx/lib/texture2.c
index 173e5026d9..014612fb57 100644
--- a/thirdparty/libktx/lib/texture2.c
+++ b/thirdparty/libktx/lib/texture2.c
@@ -20,6 +20,7 @@
 #define _CRT_SECURE_NO_WARNINGS
 #endif
 
+#include <assert.h>
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
@@ -34,12 +35,13 @@
 #include "memstream.h"
 #include "texture2.h"
 #include "unused.h"
-#include "vk_format.h"
 
 // FIXME: Test this #define and put it in a header somewhere.
 //#define IS_BIG_ENDIAN (1 == *(unsigned char *)&(const int){0x01000000ul})
 #define IS_BIG_ENDIAN 0
 
+extern uint32_t vkFormatTypeSize(VkFormat format);
+
 struct ktxTexture_vtbl ktxTexture2_vtbl;
 struct ktxTexture_vtblInt ktxTexture2_vtblInt;
 
@@ -217,18 +219,18 @@ ktx_uint32_t e5b9g9r9_ufloat_comparator[e5b9g9r9_bdbwordcount] = {
 #endif
 
 /**
-* @private
-* @~English
-* @brief Initialize a ktxFormatSize object from the info in a DFD.
-*
-* This is used instead of referring to the DFD directly so code dealing
-* with format info can be common to KTX 1 & 2.
-*
-* @param[in] This   pointer the ktxTexture2 whose DFD to use.
-* @param[in] fi       pointer to the ktxFormatSize object to initialize.
-*
-* @return    KTX_TRUE on success, otherwise KTX_FALSE.
-*/
+ * @private
+ * @~English
+ * @brief Initialize a ktxFormatSize object from the info in a DFD.
+ *
+ * This is used instead of referring to the DFD directly so code dealing
+ * with format info can be common to KTX 1 & 2.
+ *
+ * @param[in] This   pointer the ktxFormatSize to initialize.
+ * @param[in] pDFD   pointer to the DFD whose data to use.
+ *
+ * @return    KTX_TRUE on success, otherwise KTX_FALSE.
+ */
 bool
 ktxFormatSize_initFromDfd(ktxFormatSize* This, ktx_uint32_t* pDfd)
 {
@@ -308,9 +310,10 @@ ktxFormatSize_initFromDfd(ktxFormatSize* This, ktx_uint32_t* pDfd)
         // the following reasons. (1) in v2 files levelIndex is always used to
         // calculate data size and, of course, for the level offsets. (2) Finer
         // grain access to supercompressed data than levels is not possible.
-        uint32_t blockByteLength;
-        recreateBytesPlane0FromSampleInfo(pDfd, &blockByteLength);
-        This->blockSizeInBits = blockByteLength * 8;
+        //
+        // The value set here is applied to the DFD after the data has been
+        // inflated during loading.
+        This->blockSizeInBits = reconstructDFDBytesPlane0FromSamples(pDfd) * 8;
     }
     return true;
 }
@@ -427,34 +430,11 @@ ktxTexture2_construct(ktxTexture2* This, ktxTextureCreateInfo* createInfo,
 
     This->vkFormat = createInfo->vkFormat;
 
-    // Ideally we'd set all these things in ktxFormatSize_initFromDfd
-    // but This->_protected is not allocated until ktxTexture_construct;
-    if (This->isCompressed && (formatSize.flags & KTX_FORMAT_SIZE_YUVSDA_BIT) == 0) {
-        This->_protected->_typeSize = 1;
-    } else if (formatSize.flags & (KTX_FORMAT_SIZE_DEPTH_BIT | KTX_FORMAT_SIZE_STENCIL_BIT)) {
-        switch (createInfo->vkFormat) {
-        case VK_FORMAT_S8_UINT:
-            This->_protected->_typeSize = 1;
-            break;
-        case VK_FORMAT_D16_UNORM: // [[fallthrough]];
-        case VK_FORMAT_D16_UNORM_S8_UINT:
-            This->_protected->_typeSize = 2;
-            break;
-        case VK_FORMAT_X8_D24_UNORM_PACK32: // [[fallthrough]];
-        case VK_FORMAT_D24_UNORM_S8_UINT: // [[fallthrough]];
-        case VK_FORMAT_D32_SFLOAT: // [[fallthrough]];
-        case VK_FORMAT_D32_SFLOAT_S8_UINT:
-            This->_protected->_typeSize = 4;
-            break;
-        }
-    } else if (formatSize.flags & KTX_FORMAT_SIZE_PACKED_BIT) {
-        This->_protected->_typeSize = formatSize.blockSizeInBits / 8;
-    } else {
-        // Unpacked and uncompressed
-        uint32_t numComponents;
-        getDFDComponentInfoUnpacked(This->pDfd, &numComponents,
-                                    &This->_protected->_typeSize);
-    }
+    // The typeSize cannot be reconstructed just from the DFD as the BDFD
+    // does not capture the packing expressed by the [m]PACK[n] layout
+    // information in the VkFormat, so we calculate the typeSize directly
+    // from the vkFormat
+    This->_protected->_typeSize = vkFormatTypeSize(createInfo->vkFormat);
 
     This->supercompressionScheme = KTX_SS_NONE;
 
diff --git a/thirdparty/libktx/lib/vk_format.h b/thirdparty/libktx/lib/vk_format.h
index 18adf33b59..ab8a1ecbb6 100644
--- a/thirdparty/libktx/lib/vk_format.h
+++ b/thirdparty/libktx/lib/vk_format.h
@@ -401,6 +401,7 @@ static inline VkFormat vkGetFormatFromOpenGLFormat( const GLenum format, const G
 	return VK_FORMAT_UNDEFINED;
 }
 
+#if defined(NEED_VK_GET_FORMAT_FROM_OPENGL_TYPE)
 static inline VkFormat vkGetFormatFromOpenGLType( const GLenum type, const GLuint numComponents, const GLboolean normalized )
 {
 	switch ( type )
@@ -566,6 +567,7 @@ static inline VkFormat vkGetFormatFromOpenGLType( const GLenum type, const GLuin
 
 	return VK_FORMAT_UNDEFINED;
 }
+#endif
 
 static inline VkFormat vkGetFormatFromOpenGLInternalFormat( const GLenum internalFormat )
 {
@@ -823,6 +825,7 @@ static inline VkFormat vkGetFormatFromOpenGLInternalFormat( const GLenum interna
 	}
 }
 
+#if defined(NEED_VK_GET_FORMAT_SIZE)
 static inline void vkGetFormatSize( const VkFormat format, ktxFormatSize * pFormatSize )
 {
 	pFormatSize->minBlocksX = pFormatSize->minBlocksY = 1;
@@ -1384,5 +1387,6 @@ static inline void vkGetFormatSize( const VkFormat format, ktxFormatSize * pForm
 			break;
 	}
 }
+#endif
 
 #endif // !VK_FORMAT_H
diff --git a/thirdparty/libktx/lib/vkformat_check.c b/thirdparty/libktx/lib/vkformat_check.c
index e987b3d76a..5b86e3c8fe 100644
--- a/thirdparty/libktx/lib/vkformat_check.c
+++ b/thirdparty/libktx/lib/vkformat_check.c
@@ -30,13 +30,8 @@ isProhibitedFormat(VkFormat format)
       case VK_FORMAT_R8G8B8A8_SSCALED:
       case VK_FORMAT_B8G8R8A8_USCALED:
       case VK_FORMAT_B8G8R8A8_SSCALED:
-      case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
-      case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
       case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
       case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
-      case VK_FORMAT_A8B8G8R8_UINT_PACK32:
-      case VK_FORMAT_A8B8G8R8_SINT_PACK32:
-      case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
       case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
       case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
       case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
diff --git a/thirdparty/libktx/lib/vkformat_typesize.c b/thirdparty/libktx/lib/vkformat_typesize.c
new file mode 100644
index 0000000000..92fb468045
--- /dev/null
+++ b/thirdparty/libktx/lib/vkformat_typesize.c
@@ -0,0 +1,584 @@
+
+/***************************** Do not edit.  *****************************
+ Automatically generated from vulkan_core.h version 267 by mkvkformatfiles.
+ *************************************************************************/
+
+/*
+** Copyright 2015-2023 The Khronos Group Inc.
+**
+** SPDX-License-Identifier: Apache-2.0
+*/
+
+
+#include <stdint.h>
+
+#include "vkformat_enum.h"
+
+uint32_t
+vkFormatTypeSize(VkFormat format)
+{
+    switch (format) {
+      case VK_FORMAT_UNDEFINED:
+        return 1;
+      case VK_FORMAT_R4G4_UNORM_PACK8:
+        return 1;
+      case VK_FORMAT_R4G4B4A4_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_B4G4R4A4_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_R5G6B5_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_B5G6R5_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_R5G5B5A1_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_B5G5R5A1_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_A1R5G5B5_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_R8_UNORM:
+        return 1;
+      case VK_FORMAT_R8_SNORM:
+        return 1;
+      case VK_FORMAT_R8_USCALED:
+        return 1;
+      case VK_FORMAT_R8_SSCALED:
+        return 1;
+      case VK_FORMAT_R8_UINT:
+        return 1;
+      case VK_FORMAT_R8_SINT:
+        return 1;
+      case VK_FORMAT_R8_SRGB:
+        return 1;
+      case VK_FORMAT_R8G8_UNORM:
+        return 1;
+      case VK_FORMAT_R8G8_SNORM:
+        return 1;
+      case VK_FORMAT_R8G8_USCALED:
+        return 1;
+      case VK_FORMAT_R8G8_SSCALED:
+        return 1;
+      case VK_FORMAT_R8G8_UINT:
+        return 1;
+      case VK_FORMAT_R8G8_SINT:
+        return 1;
+      case VK_FORMAT_R8G8_SRGB:
+        return 1;
+      case VK_FORMAT_R8G8B8_UNORM:
+        return 1;
+      case VK_FORMAT_R8G8B8_SNORM:
+        return 1;
+      case VK_FORMAT_R8G8B8_USCALED:
+        return 1;
+      case VK_FORMAT_R8G8B8_SSCALED:
+        return 1;
+      case VK_FORMAT_R8G8B8_UINT:
+        return 1;
+      case VK_FORMAT_R8G8B8_SINT:
+        return 1;
+      case VK_FORMAT_R8G8B8_SRGB:
+        return 1;
+      case VK_FORMAT_B8G8R8_UNORM:
+        return 1;
+      case VK_FORMAT_B8G8R8_SNORM:
+        return 1;
+      case VK_FORMAT_B8G8R8_USCALED:
+        return 1;
+      case VK_FORMAT_B8G8R8_SSCALED:
+        return 1;
+      case VK_FORMAT_B8G8R8_UINT:
+        return 1;
+      case VK_FORMAT_B8G8R8_SINT:
+        return 1;
+      case VK_FORMAT_B8G8R8_SRGB:
+        return 1;
+      case VK_FORMAT_R8G8B8A8_UNORM:
+        return 1;
+      case VK_FORMAT_R8G8B8A8_SNORM:
+        return 1;
+      case VK_FORMAT_R8G8B8A8_USCALED:
+        return 1;
+      case VK_FORMAT_R8G8B8A8_SSCALED:
+        return 1;
+      case VK_FORMAT_R8G8B8A8_UINT:
+        return 1;
+      case VK_FORMAT_R8G8B8A8_SINT:
+        return 1;
+      case VK_FORMAT_R8G8B8A8_SRGB:
+        return 1;
+      case VK_FORMAT_B8G8R8A8_UNORM:
+        return 1;
+      case VK_FORMAT_B8G8R8A8_SNORM:
+        return 1;
+      case VK_FORMAT_B8G8R8A8_USCALED:
+        return 1;
+      case VK_FORMAT_B8G8R8A8_SSCALED:
+        return 1;
+      case VK_FORMAT_B8G8R8A8_UINT:
+        return 1;
+      case VK_FORMAT_B8G8R8A8_SINT:
+        return 1;
+      case VK_FORMAT_B8G8R8A8_SRGB:
+        return 1;
+      case VK_FORMAT_A8B8G8R8_UNORM_PACK32:
+        return 4;
+      case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
+        return 4;
+      case VK_FORMAT_A8B8G8R8_USCALED_PACK32:
+        return 4;
+      case VK_FORMAT_A8B8G8R8_SSCALED_PACK32:
+        return 4;
+      case VK_FORMAT_A8B8G8R8_UINT_PACK32:
+        return 4;
+      case VK_FORMAT_A8B8G8R8_SINT_PACK32:
+        return 4;
+      case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+        return 4;
+      case VK_FORMAT_A2R10G10B10_UNORM_PACK32:
+        return 4;
+      case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
+        return 4;
+      case VK_FORMAT_A2R10G10B10_USCALED_PACK32:
+        return 4;
+      case VK_FORMAT_A2R10G10B10_SSCALED_PACK32:
+        return 4;
+      case VK_FORMAT_A2R10G10B10_UINT_PACK32:
+        return 4;
+      case VK_FORMAT_A2R10G10B10_SINT_PACK32:
+        return 4;
+      case VK_FORMAT_A2B10G10R10_UNORM_PACK32:
+        return 4;
+      case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
+        return 4;
+      case VK_FORMAT_A2B10G10R10_USCALED_PACK32:
+        return 4;
+      case VK_FORMAT_A2B10G10R10_SSCALED_PACK32:
+        return 4;
+      case VK_FORMAT_A2B10G10R10_UINT_PACK32:
+        return 4;
+      case VK_FORMAT_A2B10G10R10_SINT_PACK32:
+        return 4;
+      case VK_FORMAT_R16_UNORM:
+        return 2;
+      case VK_FORMAT_R16_SNORM:
+        return 2;
+      case VK_FORMAT_R16_USCALED:
+        return 2;
+      case VK_FORMAT_R16_SSCALED:
+        return 2;
+      case VK_FORMAT_R16_UINT:
+        return 2;
+      case VK_FORMAT_R16_SINT:
+        return 2;
+      case VK_FORMAT_R16_SFLOAT:
+        return 2;
+      case VK_FORMAT_R16G16_UNORM:
+        return 2;
+      case VK_FORMAT_R16G16_SNORM:
+        return 2;
+      case VK_FORMAT_R16G16_USCALED:
+        return 2;
+      case VK_FORMAT_R16G16_SSCALED:
+        return 2;
+      case VK_FORMAT_R16G16_UINT:
+        return 2;
+      case VK_FORMAT_R16G16_SINT:
+        return 2;
+      case VK_FORMAT_R16G16_SFLOAT:
+        return 2;
+      case VK_FORMAT_R16G16B16_UNORM:
+        return 2;
+      case VK_FORMAT_R16G16B16_SNORM:
+        return 2;
+      case VK_FORMAT_R16G16B16_USCALED:
+        return 2;
+      case VK_FORMAT_R16G16B16_SSCALED:
+        return 2;
+      case VK_FORMAT_R16G16B16_UINT:
+        return 2;
+      case VK_FORMAT_R16G16B16_SINT:
+        return 2;
+      case VK_FORMAT_R16G16B16_SFLOAT:
+        return 2;
+      case VK_FORMAT_R16G16B16A16_UNORM:
+        return 2;
+      case VK_FORMAT_R16G16B16A16_SNORM:
+        return 2;
+      case VK_FORMAT_R16G16B16A16_USCALED:
+        return 2;
+      case VK_FORMAT_R16G16B16A16_SSCALED:
+        return 2;
+      case VK_FORMAT_R16G16B16A16_UINT:
+        return 2;
+      case VK_FORMAT_R16G16B16A16_SINT:
+        return 2;
+      case VK_FORMAT_R16G16B16A16_SFLOAT:
+        return 2;
+      case VK_FORMAT_R32_UINT:
+        return 4;
+      case VK_FORMAT_R32_SINT:
+        return 4;
+      case VK_FORMAT_R32_SFLOAT:
+        return 4;
+      case VK_FORMAT_R32G32_UINT:
+        return 4;
+      case VK_FORMAT_R32G32_SINT:
+        return 4;
+      case VK_FORMAT_R32G32_SFLOAT:
+        return 4;
+      case VK_FORMAT_R32G32B32_UINT:
+        return 4;
+      case VK_FORMAT_R32G32B32_SINT:
+        return 4;
+      case VK_FORMAT_R32G32B32_SFLOAT:
+        return 4;
+      case VK_FORMAT_R32G32B32A32_UINT:
+        return 4;
+      case VK_FORMAT_R32G32B32A32_SINT:
+        return 4;
+      case VK_FORMAT_R32G32B32A32_SFLOAT:
+        return 4;
+      case VK_FORMAT_R64_UINT:
+        return 8;
+      case VK_FORMAT_R64_SINT:
+        return 8;
+      case VK_FORMAT_R64_SFLOAT:
+        return 8;
+      case VK_FORMAT_R64G64_UINT:
+        return 8;
+      case VK_FORMAT_R64G64_SINT:
+        return 8;
+      case VK_FORMAT_R64G64_SFLOAT:
+        return 8;
+      case VK_FORMAT_R64G64B64_UINT:
+        return 8;
+      case VK_FORMAT_R64G64B64_SINT:
+        return 8;
+      case VK_FORMAT_R64G64B64_SFLOAT:
+        return 8;
+      case VK_FORMAT_R64G64B64A64_UINT:
+        return 8;
+      case VK_FORMAT_R64G64B64A64_SINT:
+        return 8;
+      case VK_FORMAT_R64G64B64A64_SFLOAT:
+        return 8;
+      case VK_FORMAT_B10G11R11_UFLOAT_PACK32:
+        return 4;
+      case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
+        return 4;
+      case VK_FORMAT_D16_UNORM:
+        return 2;
+      case VK_FORMAT_X8_D24_UNORM_PACK32:
+        return 4;
+      case VK_FORMAT_D32_SFLOAT:
+        return 4;
+      case VK_FORMAT_S8_UINT:
+        return 1;
+      case VK_FORMAT_D16_UNORM_S8_UINT:
+        return 2;
+      case VK_FORMAT_D24_UNORM_S8_UINT:
+        return 4;
+      case VK_FORMAT_D32_SFLOAT_S8_UINT:
+        return 4;
+      case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_BC2_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC2_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_BC3_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC3_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_BC4_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC4_SNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC5_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC5_SNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC6H_UFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_BC6H_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_BC7_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_BC7_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_EAC_R11_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_EAC_R11_SNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
+        return 1;
+      case VK_FORMAT_G8B8G8R8_422_UNORM:
+        return 1;
+      case VK_FORMAT_B8G8R8G8_422_UNORM:
+        return 1;
+      case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
+        return 1;
+      case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
+        return 1;
+      case VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM:
+        return 1;
+      case VK_FORMAT_G8_B8R8_2PLANE_422_UNORM:
+        return 1;
+      case VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM:
+        return 1;
+      case VK_FORMAT_R10X6_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_R10X6G10X6_UNORM_2PACK16:
+        return 2;
+      case VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16:
+        return 2;
+      case VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16:
+        return 2;
+      case VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16:
+        return 2;
+      case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_R12X4_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_R12X4G12X4_UNORM_2PACK16:
+        return 2;
+      case VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16:
+        return 2;
+      case VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16:
+        return 2;
+      case VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16:
+        return 2;
+      case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G16B16G16R16_422_UNORM:
+        return 2;
+      case VK_FORMAT_B16G16R16G16_422_UNORM:
+        return 2;
+      case VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM:
+        return 2;
+      case VK_FORMAT_G16_B16R16_2PLANE_420_UNORM:
+        return 2;
+      case VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM:
+        return 2;
+      case VK_FORMAT_G16_B16R16_2PLANE_422_UNORM:
+        return 2;
+      case VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM:
+        return 2;
+      case VK_FORMAT_G8_B8R8_2PLANE_444_UNORM:
+        return 1;
+      case VK_FORMAT_G10X6_B10X6R10X6_2PLANE_444_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G12X4_B12X4R12X4_2PLANE_444_UNORM_3PACK16:
+        return 2;
+      case VK_FORMAT_G16_B16R16_2PLANE_444_UNORM:
+        return 2;
+      case VK_FORMAT_A4R4G4B4_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_A4B4G4R4_UNORM_PACK16:
+        return 2;
+      case VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK:
+        return 1;
+      case VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG:
+        return 1;
+      case VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG:
+        return 1;
+      case VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG:
+        return 1;
+      case VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG:
+        return 1;
+      case VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG:
+        return 1;
+      case VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG:
+        return 1;
+      case VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG:
+        return 1;
+      case VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG:
+        return 1;
+      case VK_FORMAT_ASTC_3x3x3_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_3x3x3_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_3x3x3_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x3x3_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x3x3_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x3x3_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x4x3_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x4x3_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x4x3_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x4x4_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x4x4_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_4x4x4_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x4x4_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x4x4_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x4x4_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x5x4_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x5x4_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x5x4_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x5x5_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x5x5_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_5x5x5_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x5x5_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x5x5_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x5x5_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x6x5_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x6x5_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x6x5_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x6x6_UNORM_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x6x6_SRGB_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_ASTC_6x6x6_SFLOAT_BLOCK_EXT:
+        return 1;
+      case VK_FORMAT_R16G16_S10_5_NV:
+        return 2;
+      case VK_FORMAT_A1B5G5R5_UNORM_PACK16_KHR:
+        return 2;
+      case VK_FORMAT_A8_UNORM_KHR:
+        return 1;
+      default:
+        return 0;
+    }
+}
diff --git a/thirdparty/libktx/patches/godot.patch b/thirdparty/libktx/patches/godot.patch
index 28db86cf9b..13468813fb 100644
--- a/thirdparty/libktx/patches/godot.patch
+++ b/thirdparty/libktx/patches/godot.patch
@@ -16,7 +16,7 @@ index ca68545e4a..d7ecb7a0fd 100644
  #undef DECLARE_PRIVATE
  #undef DECLARE_PROTECTED
 diff --git a/thirdparty/libktx/lib/dfdutils/vk2dfd.inl b/thirdparty/libktx/lib/dfdutils/vk2dfd.inl
-index 85d53202a5..25c7a2c238 100644
+index 5104c8fcb4..3398441e8c 100644
 --- a/thirdparty/libktx/lib/dfdutils/vk2dfd.inl
 +++ b/thirdparty/libktx/lib/dfdutils/vk2dfd.inl
 @@ -370,6 +370,7 @@ case VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG: return createDFDCompressed(c_PVRTC, 8
@@ -32,9 +32,9 @@ index 85d53202a5..25c7a2c238 100644
  case VK_FORMAT_ASTC_6x6x6_SRGB_BLOCK_EXT: return createDFDCompressed(c_ASTC, 6, 6, 6, s_SRGB);
  case VK_FORMAT_ASTC_6x6x6_SFLOAT_BLOCK_EXT: return createDFDCompressed(c_ASTC, 6, 6, 6, s_SFLOAT);
 +#endif
+ case VK_FORMAT_R16G16_S10_5_NV: return createDFDUnpacked(0, 2, 2, 0, s_S10_5);
  case VK_FORMAT_A1B5G5R5_UNORM_PACK16_KHR: {
      int channels[] = {0,1,2,3}; int bits[] = {5,5,5,1};
-     return createDFDPacked(0, 4, bits, channels, s_UNORM);
 diff --git a/thirdparty/libktx/lib/miniz_wrapper.cpp b/thirdparty/libktx/lib/miniz_wrapper.cpp
 index 07920c4809..cbd7da540a 100644
 --- a/thirdparty/libktx/lib/miniz_wrapper.cpp
diff --git a/thirdparty/mbedtls/include/mbedtls/aesni.h b/thirdparty/mbedtls/include/mbedtls/aesni.h
index b636c100ae..93f067304d 100644
--- a/thirdparty/mbedtls/include/mbedtls/aesni.h
+++ b/thirdparty/mbedtls/include/mbedtls/aesni.h
@@ -46,7 +46,7 @@
  *       macros that may change in future releases.
  */
 #undef MBEDTLS_AESNI_HAVE_INTRINSICS
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && !defined(__clang__)
 /* Visual Studio supports AESNI intrinsics since VS 2008 SP1. We only support
  * VS 2013 and up for other reasons anyway, so no need to check the version. */
 #define MBEDTLS_AESNI_HAVE_INTRINSICS
@@ -54,7 +54,7 @@
 /* GCC-like compilers: currently, we only support intrinsics if the requisite
  * target flag is enabled when building the library (e.g. `gcc -mpclmul -msse2`
  * or `clang -maes -mpclmul`). */
-#if defined(__GNUC__) && defined(__AES__) && defined(__PCLMUL__)
+#if (defined(__GNUC__) || defined(__clang__)) && defined(__AES__) && defined(__PCLMUL__)
 #define MBEDTLS_AESNI_HAVE_INTRINSICS
 #endif
 
diff --git a/thirdparty/mbedtls/include/mbedtls/config.h b/thirdparty/mbedtls/include/mbedtls/config.h
index ac2146ea11..4842fd494c 100644
--- a/thirdparty/mbedtls/include/mbedtls/config.h
+++ b/thirdparty/mbedtls/include/mbedtls/config.h
@@ -1571,6 +1571,26 @@
 //#define MBEDTLS_PSA_INJECT_ENTROPY
 
 /**
+ * \def MBEDTLS_PSA_ASSUME_EXCLUSIVE_BUFFERS
+ *
+ * Assume all buffers passed to PSA functions are owned exclusively by the
+ * PSA function and are not stored in shared memory.
+ *
+ * This option may be enabled if all buffers passed to any PSA function reside
+ * in memory that is accessible only to the PSA function during its execution.
+ *
+ * This option MUST be disabled whenever buffer arguments are in memory shared
+ * with an untrusted party, for example where arguments to PSA calls are passed
+ * across a trust boundary.
+ *
+ * \note Enabling this option reduces memory usage and code size.
+ *
+ * \note Enabling this option causes overlap of input and output buffers
+ *       not to be supported by PSA functions.
+ */
+//#define MBEDTLS_PSA_ASSUME_EXCLUSIVE_BUFFERS
+
+/**
  * \def MBEDTLS_RSA_NO_CRT
  *
  * Do not use the Chinese Remainder Theorem
diff --git a/thirdparty/mbedtls/include/mbedtls/ecp.h b/thirdparty/mbedtls/include/mbedtls/ecp.h
index e4e40c003c..33ea14d7e2 100644
--- a/thirdparty/mbedtls/include/mbedtls/ecp.h
+++ b/thirdparty/mbedtls/include/mbedtls/ecp.h
@@ -1265,6 +1265,8 @@ int mbedtls_ecp_gen_key(mbedtls_ecp_group_id grp_id, mbedtls_ecp_keypair *key,
 /**
  * \brief           This function reads an elliptic curve private key.
  *
+ * \note            This function does not support Curve448 yet.
+ *
  * \param grp_id    The ECP group identifier.
  * \param key       The destination key.
  * \param buf       The buffer containing the binary representation of the
@@ -1286,17 +1288,43 @@ int mbedtls_ecp_read_key(mbedtls_ecp_group_id grp_id, mbedtls_ecp_keypair *key,
 /**
  * \brief           This function exports an elliptic curve private key.
  *
+ * \note            Note that although this function accepts an output
+ *                  buffer that is smaller or larger than the key, most key
+ *                  import interfaces require the output to have exactly
+ *                  key's nominal length. It is generally simplest to
+ *                  pass the key's nominal length as \c buflen, after
+ *                  checking that the output buffer is large enough.
+ *                  See the description of the \p buflen parameter for
+ *                  how to calculate the nominal length.
+ *
+ * \note            If the private key was not set in \p key,
+ *                  the output is unspecified. Future versions
+ *                  may return an error in that case.
+ *
+ * \note            This function does not support Curve448 yet.
+ *
  * \param key       The private key.
  * \param buf       The output buffer for containing the binary representation
- *                  of the key. (Big endian integer for Weierstrass curves, byte
- *                  string for Montgomery curves.)
+ *                  of the key.
+ *                  For Weierstrass curves, this is the big-endian
+ *                  representation, padded with null bytes at the beginning
+ *                  to reach \p buflen bytes.
+ *                  For Montgomery curves, this is the standard byte string
+ *                  representation (which is little-endian), padded with
+ *                  null bytes at the end to reach \p buflen bytes.
  * \param buflen    The total length of the buffer in bytes.
+ *                  The length of the output is
+ *                  (`grp->nbits` + 7) / 8 bytes
+ *                  where `grp->nbits` is the private key size in bits.
+ *                  For Weierstrass keys, if the output buffer is smaller,
+ *                  leading zeros are trimmed to fit if possible. For
+ *                  Montgomery keys, the output buffer must always be large
+ *                  enough for the nominal length.
  *
  * \return          \c 0 on success.
- * \return          #MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL if the \p key
-                    representation is larger than the available space in \p buf.
- * \return          #MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE if the operation for
- *                  the group is not implemented.
+ * \return          #MBEDTLS_ERR_ECP_BUFFER_TOO_SMALL or
+ *                  #MBEDTLS_ERR_MPI_BUFFER_TOO_SMALL if the \p key
+ *                  representation is larger than the available space in \p buf.
  * \return          Another negative error code on different kinds of failure.
  */
 int mbedtls_ecp_write_key(mbedtls_ecp_keypair *key,
diff --git a/thirdparty/mbedtls/include/mbedtls/net_sockets.h b/thirdparty/mbedtls/include/mbedtls/net_sockets.h
index 2d3fe3f949..1a12c9c803 100644
--- a/thirdparty/mbedtls/include/mbedtls/net_sockets.h
+++ b/thirdparty/mbedtls/include/mbedtls/net_sockets.h
@@ -140,7 +140,7 @@ int mbedtls_net_bind(mbedtls_net_context *ctx, const char *bind_ip, const char *
  * \param client_ctx Will contain the connected client socket
  * \param client_ip Will contain the client IP address, can be NULL
  * \param buf_size  Size of the client_ip buffer
- * \param ip_len    Will receive the size of the client IP written,
+ * \param cip_len   Will receive the size of the client IP written,
  *                  can be NULL if client_ip is null
  *
  * \return          0 if successful, or
@@ -153,7 +153,7 @@ int mbedtls_net_bind(mbedtls_net_context *ctx, const char *bind_ip, const char *
  */
 int mbedtls_net_accept(mbedtls_net_context *bind_ctx,
                        mbedtls_net_context *client_ctx,
-                       void *client_ip, size_t buf_size, size_t *ip_len);
+                       void *client_ip, size_t buf_size, size_t *cip_len);
 
 /**
  * \brief          Check and wait for the context to be ready for read/write
diff --git a/thirdparty/mbedtls/include/mbedtls/version.h b/thirdparty/mbedtls/include/mbedtls/version.h
index 0533bca681..bbe76b1739 100644
--- a/thirdparty/mbedtls/include/mbedtls/version.h
+++ b/thirdparty/mbedtls/include/mbedtls/version.h
@@ -26,16 +26,16 @@
  */
 #define MBEDTLS_VERSION_MAJOR  2
 #define MBEDTLS_VERSION_MINOR  28
-#define MBEDTLS_VERSION_PATCH  7
+#define MBEDTLS_VERSION_PATCH  8
 
 /**
  * The single version number has the following structure:
  *    MMNNPP00
  *    Major version | Minor version | Patch version
  */
-#define MBEDTLS_VERSION_NUMBER         0x021C0700
-#define MBEDTLS_VERSION_STRING         "2.28.7"
-#define MBEDTLS_VERSION_STRING_FULL    "Mbed TLS 2.28.7"
+#define MBEDTLS_VERSION_NUMBER         0x021C0800
+#define MBEDTLS_VERSION_STRING         "2.28.8"
+#define MBEDTLS_VERSION_STRING_FULL    "Mbed TLS 2.28.8"
 
 #if defined(MBEDTLS_VERSION_C)
 
diff --git a/thirdparty/mbedtls/library/aes.c b/thirdparty/mbedtls/library/aes.c
index 24d7ab92fb..836367cea7 100644
--- a/thirdparty/mbedtls/library/aes.c
+++ b/thirdparty/mbedtls/library/aes.c
@@ -322,7 +322,7 @@ static const uint32_t RT3[256] = { RT };
 /*
  * Round constants
  */
-static const uint32_t RCON[10] =
+static const uint32_t round_constants[10] =
 {
     0x00000001, 0x00000002, 0x00000004, 0x00000008,
     0x00000010, 0x00000020, 0x00000040, 0x00000080,
@@ -369,7 +369,7 @@ static uint32_t RT3[256];
 /*
  * Round constants
  */
-static uint32_t RCON[10];
+static uint32_t round_constants[10];
 
 /*
  * Tables generation code
@@ -399,7 +399,7 @@ static void aes_gen_tables(void)
      * calculate the round constants
      */
     for (i = 0, x = 1; i < 10; i++) {
-        RCON[i] = (uint32_t) x;
+        round_constants[i] = (uint32_t) x;
         x = MBEDTLS_BYTE_0(XTIME(x));
     }
 
@@ -625,7 +625,7 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key,
         case 10:
 
             for (i = 0; i < 10; i++, RK += 4) {
-                RK[4]  = RK[0] ^ RCON[i] ^
+                RK[4]  = RK[0] ^ round_constants[i] ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_1(RK[3])]) ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_2(RK[3])] <<  8) ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_3(RK[3])] << 16) ^
@@ -640,7 +640,7 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key,
         case 12:
 
             for (i = 0; i < 8; i++, RK += 6) {
-                RK[6]  = RK[0] ^ RCON[i] ^
+                RK[6]  = RK[0] ^ round_constants[i] ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_1(RK[5])]) ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_2(RK[5])] <<  8) ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_3(RK[5])] << 16) ^
@@ -657,7 +657,7 @@ int mbedtls_aes_setkey_enc(mbedtls_aes_context *ctx, const unsigned char *key,
         case 14:
 
             for (i = 0; i < 7; i++, RK += 8) {
-                RK[8]  = RK[0] ^ RCON[i] ^
+                RK[8]  = RK[0] ^ round_constants[i] ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_1(RK[7])]) ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_2(RK[7])] <<  8) ^
                          ((uint32_t) FSb[MBEDTLS_BYTE_3(RK[7])] << 16) ^
diff --git a/thirdparty/mbedtls/library/aesni.c b/thirdparty/mbedtls/library/aesni.c
index dd84c2b4ea..74bae91f5e 100644
--- a/thirdparty/mbedtls/library/aesni.c
+++ b/thirdparty/mbedtls/library/aesni.c
@@ -27,10 +27,12 @@
 #if defined(MBEDTLS_AESNI_HAVE_CODE)
 
 #if MBEDTLS_AESNI_HAVE_CODE == 2
-#if !defined(_WIN32)
+#if defined(__GNUC__)
 #include <cpuid.h>
-#else
+#elif defined(_MSC_VER)
 #include <intrin.h>
+#else
+#error "`__cpuid` required by MBEDTLS_AESNI_C is not supported by the compiler"
 #endif
 #include <immintrin.h>
 #endif
@@ -45,7 +47,7 @@ int mbedtls_aesni_has_support(unsigned int what)
 
     if (!done) {
 #if MBEDTLS_AESNI_HAVE_CODE == 2
-        static unsigned info[4] = { 0, 0, 0, 0 };
+        static int info[4] = { 0, 0, 0, 0 };
 #if defined(_MSC_VER)
         __cpuid(info, 1);
 #else
@@ -179,7 +181,7 @@ void mbedtls_aesni_gcm_mult(unsigned char c[16],
                             const unsigned char a[16],
                             const unsigned char b[16])
 {
-    __m128i aa, bb, cc, dd;
+    __m128i aa = { 0 }, bb = { 0 }, cc, dd;
 
     /* The inputs are in big-endian order, so byte-reverse them */
     for (size_t i = 0; i < 16; i++) {
diff --git a/thirdparty/mbedtls/library/common.h b/thirdparty/mbedtls/library/common.h
index bf18d725cc..49e2c97ea0 100644
--- a/thirdparty/mbedtls/library/common.h
+++ b/thirdparty/mbedtls/library/common.h
@@ -350,4 +350,31 @@ static inline const unsigned char *mbedtls_buffer_offset_const(
 #define MBEDTLS_STATIC_ASSERT(expr, msg)
 #endif
 
+/* Suppress compiler warnings for unused functions and variables. */
+#if !defined(MBEDTLS_MAYBE_UNUSED) && defined(__has_attribute)
+#    if __has_attribute(unused)
+#        define MBEDTLS_MAYBE_UNUSED __attribute__((unused))
+#    endif
+#endif
+#if !defined(MBEDTLS_MAYBE_UNUSED) && defined(__GNUC__)
+#    define MBEDTLS_MAYBE_UNUSED __attribute__((unused))
+#endif
+#if !defined(MBEDTLS_MAYBE_UNUSED) && defined(__IAR_SYSTEMS_ICC__) && defined(__VER__)
+/* IAR does support __attribute__((unused)), but only if the -e flag (extended language support)
+ * is given; the pragma always works.
+ * Unfortunately the pragma affects the rest of the file where it is used, but this is harmless.
+ * Check for version 5.2 or later - this pragma may be supported by earlier versions, but I wasn't
+ * able to find documentation).
+ */
+#    if (__VER__ >= 5020000)
+#        define MBEDTLS_MAYBE_UNUSED _Pragma("diag_suppress=Pe177")
+#    endif
+#endif
+#if !defined(MBEDTLS_MAYBE_UNUSED) && defined(_MSC_VER)
+#    define MBEDTLS_MAYBE_UNUSED __pragma(warning(suppress:4189))
+#endif
+#if !defined(MBEDTLS_MAYBE_UNUSED)
+#    define MBEDTLS_MAYBE_UNUSED
+#endif
+
 #endif /* MBEDTLS_LIBRARY_COMMON_H */
diff --git a/thirdparty/mbedtls/library/ecp.c b/thirdparty/mbedtls/library/ecp.c
index 31a6b9e305..cfe02b0d2c 100644
--- a/thirdparty/mbedtls/library/ecp.c
+++ b/thirdparty/mbedtls/library/ecp.c
@@ -927,7 +927,7 @@ int mbedtls_ecp_point_read_binary(const mbedtls_ecp_group *grp,
     size_t plen;
     ECP_VALIDATE_RET(grp != NULL);
     ECP_VALIDATE_RET(pt  != NULL);
-    ECP_VALIDATE_RET(buf != NULL);
+    ECP_VALIDATE_RET(ilen == 0 || buf != NULL);
 
     if (ilen < 1) {
         return MBEDTLS_ERR_ECP_BAD_INPUT_DATA;
@@ -996,7 +996,7 @@ int mbedtls_ecp_tls_read_point(const mbedtls_ecp_group *grp,
     ECP_VALIDATE_RET(grp != NULL);
     ECP_VALIDATE_RET(pt  != NULL);
     ECP_VALIDATE_RET(buf != NULL);
-    ECP_VALIDATE_RET(*buf != NULL);
+    ECP_VALIDATE_RET(buf_len == 0 || *buf != NULL);
 
     /*
      * We must have at least two bytes (1 for length, at least one for data)
@@ -1068,7 +1068,7 @@ int mbedtls_ecp_tls_read_group(mbedtls_ecp_group *grp,
     mbedtls_ecp_group_id grp_id;
     ECP_VALIDATE_RET(grp  != NULL);
     ECP_VALIDATE_RET(buf  != NULL);
-    ECP_VALIDATE_RET(*buf != NULL);
+    ECP_VALIDATE_RET(len == 0 || *buf != NULL);
 
     if ((ret = mbedtls_ecp_tls_read_group_id(&grp_id, buf, len)) != 0) {
         return ret;
@@ -1088,7 +1088,7 @@ int mbedtls_ecp_tls_read_group_id(mbedtls_ecp_group_id *grp,
     const mbedtls_ecp_curve_info *curve_info;
     ECP_VALIDATE_RET(grp  != NULL);
     ECP_VALIDATE_RET(buf  != NULL);
-    ECP_VALIDATE_RET(*buf != NULL);
+    ECP_VALIDATE_RET(len == 0 || *buf != NULL);
 
     /*
      * We expect at least three bytes (see below)
@@ -2614,8 +2614,8 @@ static int ecp_mul_mxz(mbedtls_ecp_group *grp, mbedtls_ecp_point *R,
     /* RP.X might be slightly larger than P, so reduce it */
     MOD_ADD(RP.X);
 
+    /* Randomize coordinates of the starting point */
 #if defined(MBEDTLS_ECP_NO_INTERNAL_RNG)
-    /* Derandomize coordinates of the starting point */
     if (f_rng == NULL) {
         have_rng = 0;
     }
@@ -3358,10 +3358,10 @@ cleanup:
 int mbedtls_ecp_write_key(mbedtls_ecp_keypair *key,
                           unsigned char *buf, size_t buflen)
 {
-    int ret = MBEDTLS_ERR_ECP_FEATURE_UNAVAILABLE;
+    int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
     ECP_VALIDATE_RET(key != NULL);
-    ECP_VALIDATE_RET(buf != NULL);
+    ECP_VALIDATE_RET(buflen == 0 || buf != NULL);
 
 #if defined(MBEDTLS_ECP_MONTGOMERY_ENABLED)
     if (mbedtls_ecp_get_type(&key->grp) == MBEDTLS_ECP_TYPE_MONTGOMERY) {
diff --git a/thirdparty/mbedtls/library/ecp_curves.c b/thirdparty/mbedtls/library/ecp_curves.c
index c7565cce5d..61a1046f3a 100644
--- a/thirdparty/mbedtls/library/ecp_curves.c
+++ b/thirdparty/mbedtls/library/ecp_curves.c
@@ -535,10 +535,10 @@ static inline void ecp_mpi_load(mbedtls_mpi *X, const mbedtls_mpi_uint *p, size_
  */
 static inline void ecp_mpi_set1(mbedtls_mpi *X)
 {
-    static mbedtls_mpi_uint one[] = { 1 };
+    static const mbedtls_mpi_uint one[] = { 1 };
     X->s = 1;
     X->n = 1;
-    X->p = one;
+    X->p = (mbedtls_mpi_uint *) one; /* X->p will not be modified so the cast is safe */
 }
 
 /*
@@ -1348,7 +1348,7 @@ cleanup:
  */
 #define P_KOBLITZ_MAX   (256 / 8 / sizeof(mbedtls_mpi_uint))      // Max limbs in P
 #define P_KOBLITZ_R     (8 / sizeof(mbedtls_mpi_uint))            // Limbs in R
-static inline int ecp_mod_koblitz(mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t p_limbs,
+static inline int ecp_mod_koblitz(mbedtls_mpi *N, const mbedtls_mpi_uint *Rp, size_t p_limbs,
                                   size_t adjust, size_t shift, mbedtls_mpi_uint mask)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
@@ -1362,7 +1362,7 @@ static inline int ecp_mod_koblitz(mbedtls_mpi *N, mbedtls_mpi_uint *Rp, size_t p
 
     /* Init R */
     R.s = 1;
-    R.p = Rp;
+    R.p = (mbedtls_mpi_uint *) Rp; /* R.p will not be modified so the cast is safe */
     R.n = P_KOBLITZ_R;
 
     /* Common setup for M */
@@ -1433,7 +1433,7 @@ cleanup:
  */
 static int ecp_mod_p192k1(mbedtls_mpi *N)
 {
-    static mbedtls_mpi_uint Rp[] = {
+    static const mbedtls_mpi_uint Rp[] = {
         MBEDTLS_BYTES_TO_T_UINT_8(0xC9, 0x11, 0x00, 0x00, 0x01, 0x00, 0x00,
                                   0x00)
     };
@@ -1450,7 +1450,7 @@ static int ecp_mod_p192k1(mbedtls_mpi *N)
  */
 static int ecp_mod_p224k1(mbedtls_mpi *N)
 {
-    static mbedtls_mpi_uint Rp[] = {
+    static const mbedtls_mpi_uint Rp[] = {
         MBEDTLS_BYTES_TO_T_UINT_8(0x93, 0x1A, 0x00, 0x00, 0x01, 0x00, 0x00,
                                   0x00)
     };
@@ -1472,7 +1472,7 @@ static int ecp_mod_p224k1(mbedtls_mpi *N)
  */
 static int ecp_mod_p256k1(mbedtls_mpi *N)
 {
-    static mbedtls_mpi_uint Rp[] = {
+    static const mbedtls_mpi_uint Rp[] = {
         MBEDTLS_BYTES_TO_T_UINT_8(0xD1, 0x03, 0x00, 0x00, 0x01, 0x00, 0x00,
                                   0x00)
     };
diff --git a/thirdparty/mbedtls/library/entropy_poll.c b/thirdparty/mbedtls/library/entropy_poll.c
index 4c5184686e..727f848b93 100644
--- a/thirdparty/mbedtls/library/entropy_poll.c
+++ b/thirdparty/mbedtls/library/entropy_poll.c
@@ -5,7 +5,7 @@
  *  SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
  */
 
-#if defined(__linux__) && !defined(_GNU_SOURCE)
+#if defined(__linux__) || defined(__midipix__) && !defined(_GNU_SOURCE)
 /* Ensure that syscall() is available even when compiling with -std=c99 */
 #define _GNU_SOURCE
 #endif
diff --git a/thirdparty/mbedtls/library/gcm.c b/thirdparty/mbedtls/library/gcm.c
index 86d5fa2b5f..d3e773278f 100644
--- a/thirdparty/mbedtls/library/gcm.c
+++ b/thirdparty/mbedtls/library/gcm.c
@@ -241,7 +241,7 @@ int mbedtls_gcm_starts(mbedtls_gcm_context *ctx,
     uint64_t iv_bits;
 
     GCM_VALIDATE_RET(ctx != NULL);
-    GCM_VALIDATE_RET(iv != NULL);
+    GCM_VALIDATE_RET(iv_len == 0 || iv != NULL);
     GCM_VALIDATE_RET(add_len == 0 || add != NULL);
 
     /* IV and AD are limited to 2^64 bits, so 2^61 bytes */
@@ -433,7 +433,7 @@ int mbedtls_gcm_crypt_and_tag(mbedtls_gcm_context *ctx,
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
 
     GCM_VALIDATE_RET(ctx != NULL);
-    GCM_VALIDATE_RET(iv != NULL);
+    GCM_VALIDATE_RET(iv_len == 0 || iv != NULL);
     GCM_VALIDATE_RET(add_len == 0 || add != NULL);
     GCM_VALIDATE_RET(length == 0 || input != NULL);
     GCM_VALIDATE_RET(length == 0 || output != NULL);
@@ -470,7 +470,7 @@ int mbedtls_gcm_auth_decrypt(mbedtls_gcm_context *ctx,
     int diff;
 
     GCM_VALIDATE_RET(ctx != NULL);
-    GCM_VALIDATE_RET(iv != NULL);
+    GCM_VALIDATE_RET(iv_len == 0 || iv != NULL);
     GCM_VALIDATE_RET(add_len == 0 || add != NULL);
     GCM_VALIDATE_RET(tag != NULL);
     GCM_VALIDATE_RET(length == 0 || input != NULL);
diff --git a/thirdparty/mbedtls/library/net_sockets.c b/thirdparty/mbedtls/library/net_sockets.c
index 8140eeade4..5d985ef001 100644
--- a/thirdparty/mbedtls/library/net_sockets.c
+++ b/thirdparty/mbedtls/library/net_sockets.c
@@ -321,7 +321,7 @@ static int net_would_block(const mbedtls_net_context *ctx)
  */
 int mbedtls_net_accept(mbedtls_net_context *bind_ctx,
                        mbedtls_net_context *client_ctx,
-                       void *client_ip, size_t buf_size, size_t *ip_len)
+                       void *client_ip, size_t buf_size, size_t *cip_len)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
     int type;
@@ -404,22 +404,22 @@ int mbedtls_net_accept(mbedtls_net_context *bind_ctx,
     if (client_ip != NULL) {
         if (client_addr.ss_family == AF_INET) {
             struct sockaddr_in *addr4 = (struct sockaddr_in *) &client_addr;
-            *ip_len = sizeof(addr4->sin_addr.s_addr);
+            *cip_len = sizeof(addr4->sin_addr.s_addr);
 
-            if (buf_size < *ip_len) {
+            if (buf_size < *cip_len) {
                 return MBEDTLS_ERR_NET_BUFFER_TOO_SMALL;
             }
 
-            memcpy(client_ip, &addr4->sin_addr.s_addr, *ip_len);
+            memcpy(client_ip, &addr4->sin_addr.s_addr, *cip_len);
         } else {
             struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *) &client_addr;
-            *ip_len = sizeof(addr6->sin6_addr.s6_addr);
+            *cip_len = sizeof(addr6->sin6_addr.s6_addr);
 
-            if (buf_size < *ip_len) {
+            if (buf_size < *cip_len) {
                 return MBEDTLS_ERR_NET_BUFFER_TOO_SMALL;
             }
 
-            memcpy(client_ip, &addr6->sin6_addr.s6_addr, *ip_len);
+            memcpy(client_ip, &addr6->sin6_addr.s6_addr, *cip_len);
         }
     }
 
diff --git a/thirdparty/mbedtls/library/pk_wrap.c b/thirdparty/mbedtls/library/pk_wrap.c
index 14c6d3f99c..dd460a6a0c 100644
--- a/thirdparty/mbedtls/library/pk_wrap.c
+++ b/thirdparty/mbedtls/library/pk_wrap.c
@@ -53,7 +53,23 @@ static int rsa_can_do(mbedtls_pk_type_t type)
 static size_t rsa_get_bitlen(const void *ctx)
 {
     const mbedtls_rsa_context *rsa = (const mbedtls_rsa_context *) ctx;
-    return 8 * mbedtls_rsa_get_len(rsa);
+    /* Unfortunately, the rsa.h interface does not have a direct way
+     * to access the bit-length that works with MBEDTLS_RSA_ALT.
+     * So we have to do a little work here.
+     */
+    mbedtls_mpi N;
+    mbedtls_mpi_init(&N);
+    int ret = mbedtls_rsa_export(rsa, &N, NULL, NULL, NULL, NULL);
+    /* If the export fails for some reason (e.g. the RSA_ALT implementation
+     * does not support export, or there is not enough memory),
+     * we have no way of returning an error from this function.
+     * As a fallback, return the byte-length converted in bits, which is
+     * the correct  value if the modulus size is a multiple of 8 bits, which
+     * is very often the case in practice. */
+    size_t bitlen = (ret == 0 ? mbedtls_mpi_bitlen(&N) :
+                     8 * mbedtls_rsa_get_len(rsa));
+    mbedtls_mpi_free(&N);
+    return bitlen;
 }
 
 static int rsa_verify_wrap(void *ctx, mbedtls_md_type_t md_alg,
diff --git a/thirdparty/mbedtls/library/pkcs12.c b/thirdparty/mbedtls/library/pkcs12.c
index 712488233f..55de216edb 100644
--- a/thirdparty/mbedtls/library/pkcs12.c
+++ b/thirdparty/mbedtls/library/pkcs12.c
@@ -244,21 +244,22 @@ int mbedtls_pkcs12_pbe_ext(mbedtls_asn1_buf *pbe_params, int mode,
     }
 
 #if defined(MBEDTLS_CIPHER_MODE_WITH_PADDING)
-    /* PKCS12 uses CBC with PKCS7 padding */
-
-    mbedtls_cipher_padding_t padding = MBEDTLS_PADDING_PKCS7;
+    {
+        /* PKCS12 uses CBC with PKCS7 padding */
+        mbedtls_cipher_padding_t padding = MBEDTLS_PADDING_PKCS7;
 #if !defined(MBEDTLS_CIPHER_PADDING_PKCS7)
-    /* For historical reasons, when decrypting, this function works when
-     * decrypting even when support for PKCS7 padding is disabled. In this
-     * case, it ignores the padding, and so will never report a
-     * password mismatch.
-     */
-    if (mode == MBEDTLS_PKCS12_PBE_DECRYPT) {
-        padding = MBEDTLS_PADDING_NONE;
-    }
+        /* For historical reasons, when decrypting, this function works when
+         * decrypting even when support for PKCS7 padding is disabled. In this
+         * case, it ignores the padding, and so will never report a
+         * password mismatch.
+         */
+        if (mode == MBEDTLS_PKCS12_PBE_DECRYPT) {
+            padding = MBEDTLS_PADDING_NONE;
+        }
 #endif
-    if ((ret = mbedtls_cipher_set_padding_mode(&cipher_ctx, padding)) != 0) {
-        goto exit;
+        if ((ret = mbedtls_cipher_set_padding_mode(&cipher_ctx, padding)) != 0) {
+            goto exit;
+        }
     }
 #endif /* MBEDTLS_CIPHER_MODE_WITH_PADDING */
 
diff --git a/thirdparty/mbedtls/library/pkcs5.c b/thirdparty/mbedtls/library/pkcs5.c
index 8e5b751a38..90703c45f9 100644
--- a/thirdparty/mbedtls/library/pkcs5.c
+++ b/thirdparty/mbedtls/library/pkcs5.c
@@ -239,23 +239,25 @@ int mbedtls_pkcs5_pbes2_ext(const mbedtls_asn1_buf *pbe_params, int mode,
     }
 
 #if defined(MBEDTLS_CIPHER_MODE_WITH_PADDING)
-    /* PKCS5 uses CBC with PKCS7 padding (which is the same as
-     * "PKCS5 padding" except that it's typically only called PKCS5
-     * with 64-bit-block ciphers).
-     */
-    mbedtls_cipher_padding_t padding = MBEDTLS_PADDING_PKCS7;
+    {
+        /* PKCS5 uses CBC with PKCS7 padding (which is the same as
+         * "PKCS5 padding" except that it's typically only called PKCS5
+         * with 64-bit-block ciphers).
+         */
+        mbedtls_cipher_padding_t padding = MBEDTLS_PADDING_PKCS7;
 #if !defined(MBEDTLS_CIPHER_PADDING_PKCS7)
-    /* For historical reasons, when decrypting, this function works when
-     * decrypting even when support for PKCS7 padding is disabled. In this
-     * case, it ignores the padding, and so will never report a
-     * password mismatch.
-     */
-    if (mode == MBEDTLS_DECRYPT) {
-        padding = MBEDTLS_PADDING_NONE;
-    }
+        /* For historical reasons, when decrypting, this function works when
+         * decrypting even when support for PKCS7 padding is disabled. In this
+         * case, it ignores the padding, and so will never report a
+         * password mismatch.
+         */
+        if (mode == MBEDTLS_DECRYPT) {
+            padding = MBEDTLS_PADDING_NONE;
+        }
 #endif
-    if ((ret = mbedtls_cipher_set_padding_mode(&cipher_ctx, padding)) != 0) {
-        goto exit;
+        if ((ret = mbedtls_cipher_set_padding_mode(&cipher_ctx, padding)) != 0) {
+            goto exit;
+        }
     }
 #endif /* MBEDTLS_CIPHER_MODE_WITH_PADDING */
     if ((ret = mbedtls_cipher_crypt(&cipher_ctx, iv, enc_scheme_params.len,
diff --git a/thirdparty/mbedtls/library/pkwrite.c b/thirdparty/mbedtls/library/pkwrite.c
index fafcf0e1a7..534290df4e 100644
--- a/thirdparty/mbedtls/library/pkwrite.c
+++ b/thirdparty/mbedtls/library/pkwrite.c
@@ -559,38 +559,49 @@ end_of_export:
 int mbedtls_pk_write_pubkey_pem(mbedtls_pk_context *key, unsigned char *buf, size_t size)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    unsigned char output_buf[PUB_DER_MAX_BYTES];
+    unsigned char *output_buf = NULL;
+    output_buf = mbedtls_calloc(1, PUB_DER_MAX_BYTES);
+    if (output_buf == NULL) {
+        return MBEDTLS_ERR_PK_ALLOC_FAILED;
+    }
     size_t olen = 0;
 
     PK_VALIDATE_RET(key != NULL);
     PK_VALIDATE_RET(buf != NULL || size == 0);
 
     if ((ret = mbedtls_pk_write_pubkey_der(key, output_buf,
-                                           sizeof(output_buf))) < 0) {
-        return ret;
+                                           PUB_DER_MAX_BYTES)) < 0) {
+        goto cleanup;
     }
 
     if ((ret = mbedtls_pem_write_buffer(PEM_BEGIN_PUBLIC_KEY, PEM_END_PUBLIC_KEY,
-                                        output_buf + sizeof(output_buf) - ret,
+                                        output_buf + PUB_DER_MAX_BYTES - ret,
                                         ret, buf, size, &olen)) != 0) {
-        return ret;
+        goto cleanup;
     }
 
-    return 0;
+    ret = 0;
+cleanup:
+    mbedtls_free(output_buf);
+    return ret;
 }
 
 int mbedtls_pk_write_key_pem(mbedtls_pk_context *key, unsigned char *buf, size_t size)
 {
     int ret = MBEDTLS_ERR_ERROR_CORRUPTION_DETECTED;
-    unsigned char output_buf[PRV_DER_MAX_BYTES];
+    unsigned char *output_buf = NULL;
+    output_buf = mbedtls_calloc(1, PRV_DER_MAX_BYTES);
+    if (output_buf == NULL) {
+        return MBEDTLS_ERR_PK_ALLOC_FAILED;
+    }
     const char *begin, *end;
     size_t olen = 0;
 
     PK_VALIDATE_RET(key != NULL);
     PK_VALIDATE_RET(buf != NULL || size == 0);
 
-    if ((ret = mbedtls_pk_write_key_der(key, output_buf, sizeof(output_buf))) < 0) {
-        return ret;
+    if ((ret = mbedtls_pk_write_key_der(key, output_buf, PRV_DER_MAX_BYTES)) < 0) {
+        goto cleanup;
     }
 
 #if defined(MBEDTLS_RSA_C)
@@ -605,15 +616,22 @@ int mbedtls_pk_write_key_pem(mbedtls_pk_context *key, unsigned char *buf, size_t
         end = PEM_END_PRIVATE_KEY_EC;
     } else
 #endif
-    return MBEDTLS_ERR_PK_FEATURE_UNAVAILABLE;
+    {
+        ret = MBEDTLS_ERR_PK_FEATURE_UNAVAILABLE;
+        goto cleanup;
+    }
 
     if ((ret = mbedtls_pem_write_buffer(begin, end,
-                                        output_buf + sizeof(output_buf) - ret,
+                                        output_buf + PRV_DER_MAX_BYTES - ret,
                                         ret, buf, size, &olen)) != 0) {
-        return ret;
+        goto cleanup;
     }
 
-    return 0;
+    ret = 0;
+cleanup:
+    mbedtls_platform_zeroize(output_buf, PRV_DER_MAX_BYTES);
+    mbedtls_free(output_buf);
+    return ret;
 }
 #endif /* MBEDTLS_PEM_WRITE_C */
 
diff --git a/thirdparty/mbedtls/library/platform_util.c b/thirdparty/mbedtls/library/platform_util.c
index a86b07fa3f..df34167a8f 100644
--- a/thirdparty/mbedtls/library/platform_util.c
+++ b/thirdparty/mbedtls/library/platform_util.c
@@ -66,10 +66,10 @@ void mbedtls_platform_zeroize(void *buf, size_t len)
 #include <time.h>
 #if !defined(_WIN32) && (defined(unix) || \
     defined(__unix) || defined(__unix__) || (defined(__APPLE__) && \
-    defined(__MACH__)))
+    defined(__MACH__)) || defined(__midipix__))
 #include <unistd.h>
 #endif /* !_WIN32 && (unix || __unix || __unix__ ||
-        * (__APPLE__ && __MACH__)) */
+        * (__APPLE__ && __MACH__)) || __midipix__ */
 
 #if !((defined(_POSIX_VERSION) && _POSIX_VERSION >= 200809L) ||     \
     (defined(_POSIX_THREAD_SAFE_FUNCTIONS) &&                     \
diff --git a/thirdparty/mbedtls/library/ssl_tls.c b/thirdparty/mbedtls/library/ssl_tls.c
index 1a2bc7bc9e..c667a2923b 100644
--- a/thirdparty/mbedtls/library/ssl_tls.c
+++ b/thirdparty/mbedtls/library/ssl_tls.c
@@ -992,8 +992,7 @@ static int ssl_populate_transform(mbedtls_ssl_transform *transform,
     !defined(MBEDTLS_SSL_EXPORT_KEYS) && \
     !defined(MBEDTLS_SSL_DTLS_CONNECTION_ID) && \
     !defined(MBEDTLS_DEBUG_C)
-    ssl = NULL; /* make sure we don't use it except for those cases */
-    (void) ssl;
+    (void) ssl; /* ssl is unused except for those cases */
 #endif
 
     /*
@@ -5205,6 +5204,12 @@ const mbedtls_ssl_session *mbedtls_ssl_get_session_pointer(const mbedtls_ssl_con
 #define SSL_SERIALIZED_SESSION_CONFIG_CRT 0
 #endif /* MBEDTLS_X509_CRT_PARSE_C */
 
+#if defined(MBEDTLS_SSL_KEEP_PEER_CERTIFICATE)
+#define SSL_SERIALIZED_SESSION_CONFIG_KEEP_PEER_CRT 1
+#else
+#define SSL_SERIALIZED_SESSION_CONFIG_KEEP_PEER_CRT 0
+#endif /* MBEDTLS_SSL_SESSION_TICKETS */
+
 #if defined(MBEDTLS_SSL_CLI_C) && defined(MBEDTLS_SSL_SESSION_TICKETS)
 #define SSL_SERIALIZED_SESSION_CONFIG_CLIENT_TICKET 1
 #else
@@ -5242,6 +5247,7 @@ const mbedtls_ssl_session *mbedtls_ssl_get_session_pointer(const mbedtls_ssl_con
 #define SSL_SERIALIZED_SESSION_CONFIG_TRUNC_HMAC_BIT    4
 #define SSL_SERIALIZED_SESSION_CONFIG_ETM_BIT           5
 #define SSL_SERIALIZED_SESSION_CONFIG_TICKET_BIT        6
+#define SSL_SERIALIZED_SESSION_CONFIG_KEEP_PEER_CRT_BIT 7
 
 #define SSL_SERIALIZED_SESSION_CONFIG_BITFLAG                           \
     ((uint16_t) (                                                      \
@@ -5253,9 +5259,11 @@ const mbedtls_ssl_session *mbedtls_ssl_get_session_pointer(const mbedtls_ssl_con
          (SSL_SERIALIZED_SESSION_CONFIG_TRUNC_HMAC << \
              SSL_SERIALIZED_SESSION_CONFIG_TRUNC_HMAC_BIT) | \
          (SSL_SERIALIZED_SESSION_CONFIG_ETM << SSL_SERIALIZED_SESSION_CONFIG_ETM_BIT) | \
-         (SSL_SERIALIZED_SESSION_CONFIG_TICKET << SSL_SERIALIZED_SESSION_CONFIG_TICKET_BIT)))
+         (SSL_SERIALIZED_SESSION_CONFIG_TICKET << SSL_SERIALIZED_SESSION_CONFIG_TICKET_BIT) | \
+         (SSL_SERIALIZED_SESSION_CONFIG_KEEP_PEER_CRT << \
+             SSL_SERIALIZED_SESSION_CONFIG_KEEP_PEER_CRT_BIT)))
 
-static unsigned char ssl_serialized_session_header[] = {
+static const unsigned char ssl_serialized_session_header[] = {
     MBEDTLS_VERSION_MAJOR,
     MBEDTLS_VERSION_MINOR,
     MBEDTLS_VERSION_PATCH,
@@ -5279,19 +5287,36 @@ static unsigned char ssl_serialized_session_header[] = {
  *                               // the setting of those compile-time
  *                               // configuration options which influence
  *                               // the structure of mbedtls_ssl_session.
- *  uint64 start_time;
- *  uint8 ciphersuite[2];        // defined by the standard
- *  uint8 compression;           // 0 or 1
- *  uint8 session_id_len;        // at most 32
- *  opaque session_id[32];
- *  opaque master[48];           // fixed length in the standard
- *  uint32 verify_result;
- *  opaque peer_cert<0..2^24-1>; // length 0 means no peer cert
- *  opaque ticket<0..2^24-1>;    // length 0 means no ticket
- *  uint32 ticket_lifetime;
- *  uint8 mfl_code;              // up to 255 according to standard
- *  uint8 trunc_hmac;            // 0 or 1
- *  uint8 encrypt_then_mac;      // 0 or 1
+ * #if defined(MBEDTLS_HAVE_TIME)
+ *     uint64 start_time;
+ * #endif
+ *     uint8 ciphersuite[2];        // defined by the standard
+ *     uint8 compression;           // 0 or 1
+ *     uint8 session_id_len;        // at most 32
+ *     opaque session_id[32];
+ *     opaque master[48];           // fixed length in the standard
+ *     uint32 verify_result;
+ * #if defined(MBEDTLS_X509_CRT_PARSE_C)
+ * #if defined(MBEDTLS_SSL_KEEP_PEER_CERTIFICATE)
+ *     opaque peer_cert<0..2^24-1>; // length 0 means no peer cert
+ * #else
+ *     uint8 peer_cert_digest_type;
+ *     opaque peer_cert_digest<0..2^8-1>
+ * #endif
+ * #endif
+ * #if defined(MBEDTLS_SSL_SESSION_TICKETS) && defined(MBEDTLS_SSL_CLI_C)
+ *     opaque ticket<0..2^24-1>;    // length 0 means no ticket
+ *     uint32 ticket_lifetime;
+ * #endif
+ * #if defined(MBEDTLS_SSL_MAX_FRAGMENT_LENGTH)
+ *     uint8 mfl_code;              // up to 255 according to standard
+ * #endif
+ * #if defined(MBEDTLS_SSL_TRUNCATED_HMAC)
+ *     uint8 trunc_hmac;            // 0 or 1
+ * #endif
+ * #if defined(MBEDTLS_SSL_ENCRYPT_THEN_MAC)
+ *     uint8 encrypt_then_mac;      // 0 or 1
+ * #endif
  *
  * The order is the same as in the definition of the structure, except
  * verify_result is put before peer_cert so that all mandatory fields come
@@ -6124,7 +6149,7 @@ void mbedtls_ssl_session_free(mbedtls_ssl_session *session)
          (SSL_SERIALIZED_CONTEXT_CONFIG_ALPN << SSL_SERIALIZED_CONTEXT_CONFIG_ALPN_BIT) | \
          0u))
 
-static unsigned char ssl_serialized_context_header[] = {
+static const unsigned char ssl_serialized_context_header[] = {
     MBEDTLS_VERSION_MAJOR,
     MBEDTLS_VERSION_MINOR,
     MBEDTLS_VERSION_PATCH,
@@ -6655,7 +6680,7 @@ static int ssl_context_load(mbedtls_ssl_context *ssl,
             /* alpn_chosen should point to an item in the configured list */
             for (cur = ssl->conf->alpn_list; *cur != NULL; cur++) {
                 if (strlen(*cur) == alpn_len &&
-                    memcmp(p, cur, alpn_len) == 0) {
+                    memcmp(p, *cur, alpn_len) == 0) {
                     ssl->alpn_chosen = *cur;
                     break;
                 }
@@ -6822,7 +6847,7 @@ void mbedtls_ssl_config_init(mbedtls_ssl_config *conf)
 }
 
 #if defined(MBEDTLS_KEY_EXCHANGE_WITH_CERT_ENABLED)
-static int ssl_preset_default_hashes[] = {
+static const int ssl_preset_default_hashes[] = {
 #if defined(MBEDTLS_SHA512_C)
     MBEDTLS_MD_SHA512,
 #endif
@@ -6840,14 +6865,14 @@ static int ssl_preset_default_hashes[] = {
 };
 #endif
 
-static int ssl_preset_suiteb_ciphersuites[] = {
+static const int ssl_preset_suiteb_ciphersuites[] = {
     MBEDTLS_TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256,
     MBEDTLS_TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384,
     0
 };
 
 #if defined(MBEDTLS_KEY_EXCHANGE_WITH_CERT_ENABLED)
-static int ssl_preset_suiteb_hashes[] = {
+static const int ssl_preset_suiteb_hashes[] = {
     MBEDTLS_MD_SHA256,
     MBEDTLS_MD_SHA384,
     MBEDTLS_MD_NONE
@@ -6855,7 +6880,7 @@ static int ssl_preset_suiteb_hashes[] = {
 #endif
 
 #if defined(MBEDTLS_ECP_C)
-static mbedtls_ecp_group_id ssl_preset_suiteb_curves[] = {
+static const mbedtls_ecp_group_id ssl_preset_suiteb_curves[] = {
 #if defined(MBEDTLS_ECP_DP_SECP256R1_ENABLED)
     MBEDTLS_ECP_DP_SECP256R1,
 #endif
diff --git a/thirdparty/mbedtls/library/timing.c b/thirdparty/mbedtls/library/timing.c
index 7ef9f473b5..ec2cff1084 100644
--- a/thirdparty/mbedtls/library/timing.c
+++ b/thirdparty/mbedtls/library/timing.c
@@ -403,6 +403,7 @@ int mbedtls_timing_self_test(int verbose)
     uint32_t a = 0, b = 0;
     mbedtls_timing_delay_context ctx;
 
+    memset(&ctx, 0, sizeof(ctx));
     if (verbose != 0) {
         mbedtls_printf("  TIMING tests note: will take some time!\n");
     }
diff --git a/thirdparty/mbedtls/library/version_features.c b/thirdparty/mbedtls/library/version_features.c
index 779325744b..6f663b12a7 100644
--- a/thirdparty/mbedtls/library/version_features.c
+++ b/thirdparty/mbedtls/library/version_features.c
@@ -456,6 +456,9 @@ static const char * const features[] = {
 #if defined(MBEDTLS_PSA_INJECT_ENTROPY)
     "MBEDTLS_PSA_INJECT_ENTROPY",
 #endif /* MBEDTLS_PSA_INJECT_ENTROPY */
+#if defined(MBEDTLS_PSA_ASSUME_EXCLUSIVE_BUFFERS)
+    "MBEDTLS_PSA_ASSUME_EXCLUSIVE_BUFFERS",
+#endif /* MBEDTLS_PSA_ASSUME_EXCLUSIVE_BUFFERS */
 #if defined(MBEDTLS_RSA_NO_CRT)
     "MBEDTLS_RSA_NO_CRT",
 #endif /* MBEDTLS_RSA_NO_CRT */
diff --git a/thirdparty/miniupnpc/src/miniupnpcstrings.h b/thirdparty/miniupnpc/src/miniupnpcstrings.h
index bd4dd317c1..f5730111af 100644
--- a/thirdparty/miniupnpc/src/miniupnpcstrings.h
+++ b/thirdparty/miniupnpc/src/miniupnpcstrings.h
@@ -2,7 +2,7 @@
 #define MINIUPNPCSTRINGS_H_INCLUDED
 
 #define OS_STRING "Godot Engine/1.0"
-#define MINIUPNPC_VERSION_STRING "2.2.6"
+#define MINIUPNPC_VERSION_STRING "2.2.7"
 
 #if 0
 /* according to "UPnP Device Architecture 1.0" */
diff --git a/thirdparty/thorvg/inc/config.h b/thirdparty/thorvg/inc/config.h
index 67716b66cc..31baceca67 100644
--- a/thirdparty/thorvg/inc/config.h
+++ b/thirdparty/thorvg/inc/config.h
@@ -10,5 +10,5 @@
 // For internal debugging:
 //#define THORVG_LOG_ENABLED
 
-#define THORVG_VERSION_STRING "0.12.9"
+#define THORVG_VERSION_STRING "0.12.10"
 #endif
diff --git a/thirdparty/thorvg/src/common/tvgMath.cpp b/thirdparty/thorvg/src/common/tvgMath.cpp
index 42bc2cf4aa..37a8879cb5 100644
--- a/thirdparty/thorvg/src/common/tvgMath.cpp
+++ b/thirdparty/thorvg/src/common/tvgMath.cpp
@@ -71,7 +71,7 @@ void mathRotate(Matrix* m, float degree)
 {
     if (degree == 0.0f) return;
 
-    auto radian = degree / 180.0f * M_PI;
+    auto radian = degree / 180.0f * MATH_PI;
     auto cosVal = cosf(radian);
     auto sinVal = sinf(radian);
 
diff --git a/thirdparty/thorvg/src/common/tvgMath.h b/thirdparty/thorvg/src/common/tvgMath.h
index 7f6708262b..32f4e6b7d1 100644
--- a/thirdparty/thorvg/src/common/tvgMath.h
+++ b/thirdparty/thorvg/src/common/tvgMath.h
@@ -70,7 +70,7 @@ static inline bool mathEqual(const Matrix& a, const Matrix& b)
 static inline bool mathRightAngle(const Matrix* m)
 {
    auto radian = fabsf(atan2f(m->e21, m->e11));
-   if (radian < FLT_EPSILON || mathEqual(radian, float(M_PI_2)) || mathEqual(radian, float(M_PI))) return true;
+   if (radian < FLT_EPSILON || mathEqual(radian, MATH_PI2) || mathEqual(radian, MATH_PI)) return true;
    return false;
 }
 
diff --git a/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp b/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp
index 7a4f544539..f2fbc07b4a 100644
--- a/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp
+++ b/thirdparty/thorvg/src/loaders/svg/tvgSvgLoader.cpp
@@ -743,10 +743,10 @@ static Matrix* _parseTransformationMatrix(const char* value)
             } else goto error;
         } else if (state == MatrixState::Rotate) {
             //Transform to signed.
-            points[0] = fmod(points[0], 360);
-            if (points[0] < 0) points[0] += 360;
-            auto c = cosf(points[0] * (M_PI / 180.0));
-            auto s = sinf(points[0] * (M_PI / 180.0));
+            points[0] = fmodf(points[0], 360.0f);
+            if (points[0] < 0) points[0] += 360.0f;
+            auto c = cosf(points[0] * (MATH_PI / 180.0f));
+            auto s = sinf(points[0] * (MATH_PI / 180.0f));
             if (ptCount == 1) {
                 Matrix tmp = { c, -s, 0, s, c, 0, 0, 0, 1 };
                 *matrix = mathMultiply(matrix, &tmp);
@@ -769,12 +769,12 @@ static Matrix* _parseTransformationMatrix(const char* value)
             *matrix = mathMultiply(matrix, &tmp);
         } else if (state == MatrixState::SkewX) {
             if (ptCount != 1) goto error;
-            auto deg = tanf(points[0] * (M_PI / 180.0));
+            auto deg = tanf(points[0] * (MATH_PI / 180.0f));
             Matrix tmp = { 1, deg, 0, 0, 1, 0, 0, 0, 1 };
             *matrix = mathMultiply(matrix, &tmp);
         } else if (state == MatrixState::SkewY) {
             if (ptCount != 1) goto error;
-            auto deg = tanf(points[0] * (M_PI / 180.0));
+            auto deg = tanf(points[0] * (MATH_PI / 180.0f));
             Matrix tmp = { 1, 0, 0, deg, 1, 0, 0, 0, 1 };
             *matrix = mathMultiply(matrix, &tmp);
         }
@@ -1919,6 +1919,19 @@ static SvgNode* _findNodeById(SvgNode *node, const char* id)
 }
 
 
+static SvgNode* _findParentById(SvgNode* node, char* id, SvgNode* doc)
+{
+    SvgNode *parent = node->parent;
+    while (parent != nullptr && parent != doc) {
+        if (parent->id && !strcmp(parent->id, id)) {
+            return parent;
+        }
+        parent = parent->parent;
+    }
+    return nullptr;
+}
+
+
 static constexpr struct
 {
     const char* tag;
@@ -1959,8 +1972,12 @@ static bool _attrParseUseNode(void* data, const char* key, const char* value)
         defs = _getDefsNode(node);
         nodeFrom = _findNodeById(defs, id);
         if (nodeFrom) {
-            _cloneNode(nodeFrom, node, 0);
-            if (nodeFrom->type == SvgNodeType::Symbol) use->symbol = nodeFrom;
+            if (!_findParentById(node, id, loader->doc)) {
+                _cloneNode(nodeFrom, node, 0);
+                if (nodeFrom->type == SvgNodeType::Symbol) use->symbol = nodeFrom;
+            } else {
+                TVGLOG("SVG", "%s is ancestor element. This reference is invalid.", id);
+            }
             free(id);
         } else {
             //some svg export software include <defs> element at the end of the file
@@ -2669,7 +2686,7 @@ static void _inheritGradient(SvgLoaderData* loader, SvgStyleGradient* to, SvgSty
         if (to->transform) memcpy(to->transform, from->transform, sizeof(Matrix));
     }
 
-    if (to->type == SvgGradientType::Linear && from->type == SvgGradientType::Linear) {
+    if (to->type == SvgGradientType::Linear) {
         for (unsigned int i = 0; i < sizeof(linear_tags) / sizeof(linear_tags[0]); i++) {
             bool coordSet = to->flags & linear_tags[i].flag;
             if (!(to->flags & linear_tags[i].flag) && (from->flags & linear_tags[i].flag)) {
@@ -2686,7 +2703,7 @@ static void _inheritGradient(SvgLoaderData* loader, SvgStyleGradient* to, SvgSty
                 linear_tags[i].tagInheritedRecalc(loader, to->linear, to->userSpace);
             }
         }
-    } else if (to->type == SvgGradientType::Radial && from->type == SvgGradientType::Radial) {
+    } else if (to->type == SvgGradientType::Radial) {
         for (unsigned int i = 0; i < sizeof(radialTags) / sizeof(radialTags[0]); i++) {
             bool coordSet = (to->flags & radialTags[i].flag);
             if (!(to->flags & radialTags[i].flag) && (from->flags & radialTags[i].flag)) {
@@ -2696,10 +2713,16 @@ static void _inheritGradient(SvgLoaderData* loader, SvgStyleGradient* to, SvgSty
             //GradUnits not set directly, coord set
             if (!gradUnitSet && coordSet) {
                 radialTags[i].tagRecalc(loader, to->radial, to->userSpace);
+                //If fx and fy are not set, set cx and cy.
+                if (!strcmp(radialTags[i].tag, "cx") && !(to->flags & SvgGradientFlags::Fx)) to->radial->fx = to->radial->cx;
+                if (!strcmp(radialTags[i].tag, "cy") && !(to->flags & SvgGradientFlags::Fy)) to->radial->fy = to->radial->cy;
             }
             //GradUnits set, coord not set directly
             if (to->userSpace == from->userSpace) continue;
             if (gradUnitSet && !coordSet) {
+                //If fx and fx are not set, do not call recalc.
+                if (!strcmp(radialTags[i].tag, "fx") && !(to->flags & SvgGradientFlags::Fx)) continue;
+                if (!strcmp(radialTags[i].tag, "fy") && !(to->flags & SvgGradientFlags::Fy)) continue;
                 radialTags[i].tagInheritedRecalc(loader, to->radial, to->userSpace);
             }
         }
@@ -3018,9 +3041,13 @@ static void _clonePostponedNodes(Array<SvgNodeIdPair>* cloneNodes, SvgNode* doc)
         auto defs = _getDefsNode(nodeIdPair.node);
         auto nodeFrom = _findNodeById(defs, nodeIdPair.id);
         if (!nodeFrom) nodeFrom = _findNodeById(doc, nodeIdPair.id);
-        _cloneNode(nodeFrom, nodeIdPair.node, 0);
-        if (nodeFrom && nodeFrom->type == SvgNodeType::Symbol && nodeIdPair.node->type == SvgNodeType::Use) {
-            nodeIdPair.node->node.use.symbol = nodeFrom;
+        if (!_findParentById(nodeIdPair.node, nodeIdPair.id, doc)) {
+            _cloneNode(nodeFrom, nodeIdPair.node, 0);
+            if (nodeFrom && nodeFrom->type == SvgNodeType::Symbol && nodeIdPair.node->type == SvgNodeType::Use) {
+                nodeIdPair.node->node.use.symbol = nodeFrom;
+            }
+        } else {
+            TVGLOG("SVG", "%s is ancestor element. This reference is invalid.", nodeIdPair.id);
         }
         free(nodeIdPair.id);
     }
diff --git a/thirdparty/thorvg/src/loaders/svg/tvgSvgPath.cpp b/thirdparty/thorvg/src/loaders/svg/tvgSvgPath.cpp
index f9780749a9..691cde1fc5 100644
--- a/thirdparty/thorvg/src/loaders/svg/tvgSvgPath.cpp
+++ b/thirdparty/thorvg/src/loaders/svg/tvgSvgPath.cpp
@@ -126,7 +126,7 @@ void _pathAppendArcTo(Array<PathCommand>* cmds, Array<Point>* pts, Point* cur, P
     rx = fabsf(rx);
     ry = fabsf(ry);
 
-    angle = angle * M_PI / 180.0f;
+    angle = angle * MATH_PI / 180.0f;
     cosPhi = cosf(angle);
     sinPhi = sinf(angle);
     dx2 = (sx - x) / 2.0f;
@@ -195,24 +195,24 @@ void _pathAppendArcTo(Array<PathCommand>* cmds, Array<Point>* pts, Point* cur, P
     //http://www.euclideanspace.com/maths/algebra/vectors/angleBetween/index.htm
     //Note: atan2 (0.0, 1.0) == 0.0
     at = atan2(((y1p - cyp) / ry), ((x1p - cxp) / rx));
-    theta1 = (at < 0.0f) ? 2.0f * M_PI + at : at;
+    theta1 = (at < 0.0f) ? 2.0f * MATH_PI + at : at;
 
     nat = atan2(((-y1p - cyp) / ry), ((-x1p - cxp) / rx));
-    deltaTheta = (nat < at) ? 2.0f * M_PI - at + nat : nat - at;
+    deltaTheta = (nat < at) ? 2.0f * MATH_PI - at + nat : nat - at;
 
     if (sweep) {
         //Ensure delta theta < 0 or else add 360 degrees
-        if (deltaTheta < 0.0f) deltaTheta += (float)(2.0f * M_PI);
+        if (deltaTheta < 0.0f) deltaTheta += 2.0f * MATH_PI;
     } else {
         //Ensure delta theta > 0 or else substract 360 degrees
-        if (deltaTheta > 0.0f) deltaTheta -= (float)(2.0f * M_PI);
+        if (deltaTheta > 0.0f) deltaTheta -= 2.0f * MATH_PI;
     }
 
     //Add several cubic bezier to approximate the arc
     //(smaller than 90 degrees)
     //We add one extra segment because we want something
     //Smaller than 90deg (i.e. not 90 itself)
-    segments = static_cast<int>(fabsf(deltaTheta / float(M_PI_2)) + 1.0f);
+    segments = static_cast<int>(fabsf(deltaTheta / MATH_PI2) + 1.0f);
     delta = deltaTheta / segments;
 
     //http://www.stillhq.com/ctpfaq/2001/comp.text.pdf-faq-2001-04.txt (section 2.13)
diff --git a/thirdparty/thorvg/src/renderer/sw_engine/tvgSwShape.cpp b/thirdparty/thorvg/src/renderer/sw_engine/tvgSwShape.cpp
index 03261a4b7f..5e79bb6ffa 100644
--- a/thirdparty/thorvg/src/renderer/sw_engine/tvgSwShape.cpp
+++ b/thirdparty/thorvg/src/renderer/sw_engine/tvgSwShape.cpp
@@ -28,6 +28,7 @@
 /* Internal Class Implementation                                        */
 /************************************************************************/
 
+
 struct Line
 {
     Point pt1;
@@ -55,23 +56,24 @@ static void _lineSplitAt(const Line& cur, float at, Line& left, Line& right)
 }
 
 
-static void _outlineEnd(SwOutline& outline)
+static bool _outlineEnd(SwOutline& outline)
 {
-    if (outline.pts.empty()) return;
+    //Make a contour if lineTo/curveTo without calling close/moveTo beforehand.
+    if (outline.pts.empty()) return false;
     outline.cntrs.push(outline.pts.count - 1);
     outline.closed.push(false);
+    return false;
 }
 
 
-static void _outlineMoveTo(SwOutline& outline, const Point* to, const Matrix* transform)
+static bool _outlineMoveTo(SwOutline& outline, const Point* to, const Matrix* transform, bool closed = false)
 {
-    if (outline.pts.count > 0) {
-        outline.cntrs.push(outline.pts.count - 1);
-        outline.closed.push(false);
-    }
+    //make it a contour, if the last contour is not closed yet.
+    if (!closed) _outlineEnd(outline);
 
     outline.pts.push(mathTransform(to, transform));
     outline.types.push(SW_CURVE_TYPE_POINT);
+    return false;
 }
 
 
@@ -95,20 +97,22 @@ static void _outlineCubicTo(SwOutline& outline, const Point* ctrl1, const Point*
 }
 
 
-static void _outlineClose(SwOutline& outline)
+static bool _outlineClose(SwOutline& outline)
 {
-    uint32_t i = 0;
-
+    uint32_t i;
     if (outline.cntrs.count > 0) i = outline.cntrs.last() + 1;
-    else i = 0;   //First Path
+    else i = 0;
 
     //Make sure there is at least one point in the current path
-    if (outline.pts.count == i) return;
+    if (outline.pts.count == i) return false;
 
     //Close the path
     outline.pts.push(outline.pts[i]);
+    outline.cntrs.push(outline.pts.count - 1);
     outline.types.push(SW_CURVE_TYPE_POINT);
     outline.closed.push(true);
+
+    return true;
 }
 
 
@@ -306,7 +310,7 @@ static SwOutline* _genDashOutline(const RenderShape* rshape, const Matrix* trans
         bool isOdd = dash.cnt % 2;
         if (isOdd) patternLength *= 2;
 
-        offset = fmod(offset, patternLength);
+        offset = fmodf(offset, patternLength);
         if (offset < 0) offset += patternLength;
 
         for (size_t i = 0; i < dash.cnt * (1 + (size_t)isOdd); ++i, ++offIdx) {
@@ -425,25 +429,28 @@ static bool _genOutline(SwShape* shape, const RenderShape* rshape, const Matrix*
 
     shape->outline = mpoolReqOutline(mpool, tid);
     auto outline = shape->outline;
+    bool closed = false;
 
     //Generate Outlines
     while (cmdCnt-- > 0) {
         switch (*cmds) {
             case PathCommand::Close: {
-                _outlineClose(*outline);
+                if (!closed) closed = _outlineClose(*outline);
                 break;
             }
             case PathCommand::MoveTo: {
-                _outlineMoveTo(*outline, pts, transform);
+                closed = _outlineMoveTo(*outline, pts, transform, closed);
                 ++pts;
                 break;
             }
             case PathCommand::LineTo: {
+                if (closed) closed = _outlineEnd(*outline);
                 _outlineLineTo(*outline, pts, transform);
                 ++pts;
                 break;
             }
             case PathCommand::CubicTo: {
+                if (closed) closed = _outlineEnd(*outline);
                 _outlineCubicTo(*outline, pts, pts + 1, pts + 2, transform);
                 pts += 3;
                 break;
@@ -452,7 +459,7 @@ static bool _genOutline(SwShape* shape, const RenderShape* rshape, const Matrix*
         ++cmds;
     }
 
-    _outlineEnd(*outline);
+    if (!closed) _outlineEnd(*outline);
 
     outline->fillRule = rshape->rule;
     shape->outline = outline;
diff --git a/thirdparty/thorvg/src/renderer/tvgPaint.cpp b/thirdparty/thorvg/src/renderer/tvgPaint.cpp
index 13ec4183d6..227ce10a0d 100644
--- a/thirdparty/thorvg/src/renderer/tvgPaint.cpp
+++ b/thirdparty/thorvg/src/renderer/tvgPaint.cpp
@@ -41,20 +41,26 @@
     }
 
 
-static bool _compFastTrack(Paint* cmpTarget, const RenderTransform* pTransform, RenderTransform* rTransform, RenderRegion& viewport)
+
+static Result _compFastTrack(Paint* cmpTarget, const RenderTransform* pTransform, RenderTransform* rTransform, RenderRegion& viewport)
 {
     /* Access Shape class by Paint is bad... but it's ok still it's an internal usage. */
     auto shape = static_cast<Shape*>(cmpTarget);
 
     //Rectangle Candidates?
     const Point* pts;
-     if (shape->pathCoords(&pts) != 4) return false;
+    auto ptsCnt = shape->pathCoords(&pts);
+
+    //nothing to clip
+    if (ptsCnt == 0) return Result::InvalidArguments;
+
+    if (ptsCnt != 4) return Result::InsufficientCondition;
 
     if (rTransform) rTransform->update();
 
     //No rotation and no skewing
-    if (pTransform && (!mathRightAngle(&pTransform->m) || mathSkewed(&pTransform->m))) return false;
-    if (rTransform && (!mathRightAngle(&rTransform->m) || mathSkewed(&rTransform->m))) return false;
+    if (pTransform && (!mathRightAngle(&pTransform->m) || mathSkewed(&pTransform->m))) return Result::InsufficientCondition;
+    if (rTransform && (!mathRightAngle(&rTransform->m) || mathSkewed(&rTransform->m))) return Result::InsufficientCondition;
 
     //Perpendicular Rectangle?
     auto pt1 = pts + 0;
@@ -99,10 +105,9 @@ static bool _compFastTrack(Paint* cmpTarget, const RenderTransform* pTransform,
         if (viewport.w < 0) viewport.w = 0;
         if (viewport.h < 0) viewport.h = 0;
 
-        return true;
+        return Result::Success;
     }
-
-    return false;
+    return Result::InsufficientCondition;
 }
 
 
@@ -235,7 +240,7 @@ RenderData Paint::Impl::update(RenderMethod* renderer, const RenderTransform* pT
     /* 1. Composition Pre Processing */
     RenderData trd = nullptr;                 //composite target render data
     RenderRegion viewport;
-    bool compFastTrack = false;
+    Result compFastTrack = Result::InsufficientCondition;
     bool childClipper = false;
 
     if (compData) {
@@ -260,7 +265,7 @@ RenderData Paint::Impl::update(RenderMethod* renderer, const RenderTransform* pT
             }
             if (tryFastTrack) {
                 RenderRegion viewport2;
-                if ((compFastTrack = _compFastTrack(target, pTransform, target->pImpl->rTransform, viewport2))) {
+                if ((compFastTrack = _compFastTrack(target, pTransform, target->pImpl->rTransform, viewport2)) == Result::Success) {
                     viewport = renderer->viewport();
                     viewport2.intersect(viewport);
                     renderer->viewport(viewport2);
@@ -268,7 +273,7 @@ RenderData Paint::Impl::update(RenderMethod* renderer, const RenderTransform* pT
                 }
             }
         }
-        if (!compFastTrack) {
+        if (compFastTrack == Result::InsufficientCondition) {
             childClipper = compData->method == CompositeMethod::ClipPath ? true : false;
             trd = target->pImpl->update(renderer, pTransform, clips, 255, pFlag, childClipper);
             if (childClipper) clips.push(trd);
@@ -285,7 +290,7 @@ RenderData Paint::Impl::update(RenderMethod* renderer, const RenderTransform* pT
     PAINT_METHOD(rd, update(renderer, &outTransform, clips, opacity, newFlag, clipper));
 
     /* 3. Composition Post Processing */
-    if (compFastTrack) renderer->viewport(viewport);
+    if (compFastTrack == Result::Success) renderer->viewport(viewport);
     else if (childClipper) clips.pop();
 
     return rd;
diff --git a/thirdparty/thorvg/src/renderer/tvgShape.cpp b/thirdparty/thorvg/src/renderer/tvgShape.cpp
index ab1f378b47..a42060e241 100644
--- a/thirdparty/thorvg/src/renderer/tvgShape.cpp
+++ b/thirdparty/thorvg/src/renderer/tvgShape.cpp
@@ -164,7 +164,7 @@ Result Shape::appendArc(float cx, float cy, float radius, float startAngle, floa
     }
 
     for (int i = 0; i < nCurves; ++i) {
-        auto endAngle = startAngle + ((i != nCurves - 1) ? float(M_PI_2) * sweepSign : fract);
+        auto endAngle = startAngle + ((i != nCurves - 1) ? MATH_PI2 * sweepSign : fract);
         Point end = {radius * cosf(endAngle), radius * sinf(endAngle)};
 
         //variables needed to calculate bezier control points
diff --git a/thirdparty/thorvg/update-thorvg.sh b/thirdparty/thorvg/update-thorvg.sh
index 7a754c09b9..afe05e629e 100755
--- a/thirdparty/thorvg/update-thorvg.sh
+++ b/thirdparty/thorvg/update-thorvg.sh
@@ -1,6 +1,6 @@
 #!/bin/bash -e
 
-VERSION=0.12.9
+VERSION=0.12.10
 
 cd thirdparty/thorvg/ || true
 rm -rf AUTHORS LICENSE inc/ src/ *.zip *.tar.gz tmp/
diff --git a/thirdparty/zstd/common/allocations.h b/thirdparty/zstd/common/allocations.h
index a3153c4bac..5e89955010 100644
--- a/thirdparty/zstd/common/allocations.h
+++ b/thirdparty/zstd/common/allocations.h
@@ -14,7 +14,7 @@
 #define ZSTD_DEPS_NEED_MALLOC
 #include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
 
-#include "mem.h" /* MEM_STATIC */
+#include "compiler.h" /* MEM_STATIC */
 #define ZSTD_STATIC_LINKING_ONLY
 #include "../zstd.h" /* ZSTD_customMem */
 
diff --git a/thirdparty/zstd/common/bitstream.h b/thirdparty/zstd/common/bitstream.h
index 72b0b3df22..676044989c 100644
--- a/thirdparty/zstd/common/bitstream.h
+++ b/thirdparty/zstd/common/bitstream.h
@@ -90,19 +90,20 @@ MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
 /*-********************************************
 *  bitStream decoding API (read backward)
 **********************************************/
+typedef size_t BitContainerType;
 typedef struct {
-    size_t   bitContainer;
+    BitContainerType bitContainer;
     unsigned bitsConsumed;
     const char* ptr;
     const char* start;
     const char* limitPtr;
 } BIT_DStream_t;
 
-typedef enum { BIT_DStream_unfinished = 0,
-               BIT_DStream_endOfBuffer = 1,
-               BIT_DStream_completed = 2,
-               BIT_DStream_overflow = 3 } BIT_DStream_status;  /* result of BIT_reloadDStream() */
-               /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
+typedef enum { BIT_DStream_unfinished = 0,  /* fully refilled */
+               BIT_DStream_endOfBuffer = 1, /* still some bits left in bitstream */
+               BIT_DStream_completed = 2,   /* bitstream entirely consumed, bit-exact */
+               BIT_DStream_overflow = 3     /* user requested more bits than present in bitstream */
+    } BIT_DStream_status;  /* result of BIT_reloadDStream() */
 
 MEM_STATIC size_t   BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
 MEM_STATIC size_t   BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
@@ -112,7 +113,7 @@ MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
 
 /* Start by invoking BIT_initDStream().
 *  A chunk of the bitStream is then stored into a local register.
-*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
+*  Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (BitContainerType).
 *  You can then retrieve bitFields stored into the local register, **in reverse order**.
 *  Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
 *  A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
@@ -162,7 +163,7 @@ MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
     return 0;
 }
 
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
+FORCE_INLINE_TEMPLATE size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
 {
 #if defined(STATIC_BMI2) && STATIC_BMI2 == 1 && !defined(ZSTD_NO_INTRINSICS)
     return  _bzhi_u64(bitContainer, nbBits);
@@ -267,22 +268,22 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
         bitD->bitContainer = *(const BYTE*)(bitD->start);
         switch(srcSize)
         {
-        case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
+        case 7: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
                 ZSTD_FALLTHROUGH;
 
-        case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
+        case 6: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
                 ZSTD_FALLTHROUGH;
 
-        case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
+        case 5: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
                 ZSTD_FALLTHROUGH;
 
-        case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
+        case 4: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[3]) << 24;
                 ZSTD_FALLTHROUGH;
 
-        case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
+        case 3: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[2]) << 16;
                 ZSTD_FALLTHROUGH;
 
-        case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) <<  8;
+        case 2: bitD->bitContainer += (BitContainerType)(((const BYTE*)(srcBuffer))[1]) <<  8;
                 ZSTD_FALLTHROUGH;
 
         default: break;
@@ -297,12 +298,12 @@ MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, si
     return srcSize;
 }
 
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
+FORCE_INLINE_TEMPLATE size_t BIT_getUpperBits(BitContainerType bitContainer, U32 const start)
 {
     return bitContainer >> start;
 }
 
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
+FORCE_INLINE_TEMPLATE size_t BIT_getMiddleBits(BitContainerType bitContainer, U32 const start, U32 const nbBits)
 {
     U32 const regMask = sizeof(bitContainer)*8 - 1;
     /* if start > regMask, bitstream is corrupted, and result is undefined */
@@ -325,7 +326,7 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
  *  On 32-bits, maxNbBits==24.
  *  On 64-bits, maxNbBits==56.
  * @return : value extracted */
-MEM_STATIC  FORCE_INLINE_ATTR size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
+FORCE_INLINE_TEMPLATE size_t BIT_lookBits(const BIT_DStream_t*  bitD, U32 nbBits)
 {
     /* arbitrate between double-shift and shift+mask */
 #if 1
@@ -348,7 +349,7 @@ MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
     return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
 }
 
-MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
+FORCE_INLINE_TEMPLATE void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
 {
     bitD->bitsConsumed += nbBits;
 }
@@ -357,7 +358,7 @@ MEM_STATIC FORCE_INLINE_ATTR void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
  *  Read (consume) next n bits from local register and update.
  *  Pay attention to not read more than nbBits contained into local register.
  * @return : extracted value. */
-MEM_STATIC FORCE_INLINE_ATTR size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
+FORCE_INLINE_TEMPLATE size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
 {
     size_t const value = BIT_lookBits(bitD, nbBits);
     BIT_skipBits(bitD, nbBits);
@@ -374,6 +375,21 @@ MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
     return value;
 }
 
+/*! BIT_reloadDStream_internal() :
+ *  Simple variant of BIT_reloadDStream(), with two conditions:
+ *  1. bitstream is valid : bitsConsumed <= sizeof(bitD->bitContainer)*8
+ *  2. look window is valid after shifted down : bitD->ptr >= bitD->start
+ */
+MEM_STATIC BIT_DStream_status BIT_reloadDStream_internal(BIT_DStream_t* bitD)
+{
+    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
+    bitD->ptr -= bitD->bitsConsumed >> 3;
+    assert(bitD->ptr >= bitD->start);
+    bitD->bitsConsumed &= 7;
+    bitD->bitContainer = MEM_readLEST(bitD->ptr);
+    return BIT_DStream_unfinished;
+}
+
 /*! BIT_reloadDStreamFast() :
  *  Similar to BIT_reloadDStream(), but with two differences:
  *  1. bitsConsumed <= sizeof(bitD->bitContainer)*8 must hold!
@@ -384,31 +400,35 @@ MEM_STATIC BIT_DStream_status BIT_reloadDStreamFast(BIT_DStream_t* bitD)
 {
     if (UNLIKELY(bitD->ptr < bitD->limitPtr))
         return BIT_DStream_overflow;
-    assert(bitD->bitsConsumed <= sizeof(bitD->bitContainer)*8);
-    bitD->ptr -= bitD->bitsConsumed >> 3;
-    bitD->bitsConsumed &= 7;
-    bitD->bitContainer = MEM_readLEST(bitD->ptr);
-    return BIT_DStream_unfinished;
+    return BIT_reloadDStream_internal(bitD);
 }
 
 /*! BIT_reloadDStream() :
  *  Refill `bitD` from buffer previously set in BIT_initDStream() .
- *  This function is safe, it guarantees it will not read beyond src buffer.
+ *  This function is safe, it guarantees it will not never beyond src buffer.
  * @return : status of `BIT_DStream_t` internal register.
  *           when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
-MEM_STATIC FORCE_INLINE_ATTR BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
+FORCE_INLINE_TEMPLATE BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
 {
-    if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))  /* overflow detected, like end of stream */
+    /* note : once in overflow mode, a bitstream remains in this mode until it's reset */
+    if (UNLIKELY(bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8))) {
+        static const BitContainerType zeroFilled = 0;
+        bitD->ptr = (const char*)&zeroFilled; /* aliasing is allowed for char */
+        /* overflow detected, erroneous scenario or end of stream: no update */
         return BIT_DStream_overflow;
+    }
+
+    assert(bitD->ptr >= bitD->start);
 
     if (bitD->ptr >= bitD->limitPtr) {
-        return BIT_reloadDStreamFast(bitD);
+        return BIT_reloadDStream_internal(bitD);
     }
     if (bitD->ptr == bitD->start) {
+        /* reached end of bitStream => no update */
         if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
         return BIT_DStream_completed;
     }
-    /* start < ptr < limitPtr */
+    /* start < ptr < limitPtr => cautious update */
     {   U32 nbBytes = bitD->bitsConsumed >> 3;
         BIT_DStream_status result = BIT_DStream_unfinished;
         if (bitD->ptr - nbBytes < bitD->start) {
diff --git a/thirdparty/zstd/common/compiler.h b/thirdparty/zstd/common/compiler.h
index 73f8d01998..31880ecbe1 100644
--- a/thirdparty/zstd/common/compiler.h
+++ b/thirdparty/zstd/common/compiler.h
@@ -11,6 +11,8 @@
 #ifndef ZSTD_COMPILER_H
 #define ZSTD_COMPILER_H
 
+#include <stddef.h>
+
 #include "portability_macros.h"
 
 /*-*******************************************************
@@ -51,12 +53,19 @@
 #  define WIN_CDECL
 #endif
 
+/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+#if defined(__GNUC__)
+#  define UNUSED_ATTR __attribute__((unused))
+#else
+#  define UNUSED_ATTR
+#endif
+
 /**
  * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
  * parameters. They must be inlined for the compiler to eliminate the constant
  * branches.
  */
-#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR UNUSED_ATTR
 /**
  * HINT_INLINE is used to help the compiler generate better code. It is *not*
  * used for "templates", so it can be tweaked based on the compilers
@@ -71,14 +80,28 @@
 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
 #  define HINT_INLINE static INLINE_KEYWORD
 #else
-#  define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
+#  define HINT_INLINE FORCE_INLINE_TEMPLATE
 #endif
 
-/* UNUSED_ATTR tells the compiler it is okay if the function is unused. */
+/* "soft" inline :
+ * The compiler is free to select if it's a good idea to inline or not.
+ * The main objective is to silence compiler warnings
+ * when a defined function in included but not used.
+ *
+ * Note : this macro is prefixed `MEM_` because it used to be provided by `mem.h` unit.
+ * Updating the prefix is probably preferable, but requires a fairly large codemod,
+ * since this name is used everywhere.
+ */
+#ifndef MEM_STATIC  /* already defined in Linux Kernel mem.h */
 #if defined(__GNUC__)
-#  define UNUSED_ATTR __attribute__((unused))
+#  define MEM_STATIC static __inline UNUSED_ATTR
+#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+#  define MEM_STATIC static inline
+#elif defined(_MSC_VER)
+#  define MEM_STATIC static __inline
 #else
-#  define UNUSED_ATTR
+#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
+#endif
 #endif
 
 /* force no inlining */
@@ -109,10 +132,10 @@
 /* prefetch
  * can be disabled, by declaring NO_PREFETCH build macro */
 #if defined(NO_PREFETCH)
-#  define PREFETCH_L1(ptr)  (void)(ptr)  /* disabled */
-#  define PREFETCH_L2(ptr)  (void)(ptr)  /* disabled */
+#  define PREFETCH_L1(ptr)  do { (void)(ptr); } while (0)  /* disabled */
+#  define PREFETCH_L2(ptr)  do { (void)(ptr); } while (0)  /* disabled */
 #else
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86))  /* _mm_prefetch() is not defined outside of x86/x64 */
+#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) && !defined(_M_ARM64EC)  /* _mm_prefetch() is not defined outside of x86/x64 */
 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
 #    define PREFETCH_L1(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
 #    define PREFETCH_L2(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
@@ -120,24 +143,25 @@
 #    define PREFETCH_L1(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
 #    define PREFETCH_L2(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
 #  elif defined(__aarch64__)
-#    define PREFETCH_L1(ptr)  __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr)))
-#    define PREFETCH_L2(ptr)  __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr)))
+#    define PREFETCH_L1(ptr)  do { __asm__ __volatile__("prfm pldl1keep, %0" ::"Q"(*(ptr))); } while (0)
+#    define PREFETCH_L2(ptr)  do { __asm__ __volatile__("prfm pldl2keep, %0" ::"Q"(*(ptr))); } while (0)
 #  else
-#    define PREFETCH_L1(ptr) (void)(ptr)  /* disabled */
-#    define PREFETCH_L2(ptr) (void)(ptr)  /* disabled */
+#    define PREFETCH_L1(ptr) do { (void)(ptr); } while (0)  /* disabled */
+#    define PREFETCH_L2(ptr) do { (void)(ptr); } while (0)  /* disabled */
 #  endif
 #endif  /* NO_PREFETCH */
 
 #define CACHELINE_SIZE 64
 
-#define PREFETCH_AREA(p, s)  {            \
-    const char* const _ptr = (const char*)(p);  \
-    size_t const _size = (size_t)(s);     \
-    size_t _pos;                          \
-    for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) {  \
-        PREFETCH_L2(_ptr + _pos);         \
-    }                                     \
-}
+#define PREFETCH_AREA(p, s)                              \
+    do {                                                 \
+        const char* const _ptr = (const char*)(p);       \
+        size_t const _size = (size_t)(s);                \
+        size_t _pos;                                     \
+        for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
+            PREFETCH_L2(_ptr + _pos);                    \
+        }                                                \
+    } while (0)
 
 /* vectorization
  * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
@@ -166,9 +190,9 @@
 #endif
 
 #if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
-#  define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
+#  define ZSTD_UNREACHABLE do { assert(0), __builtin_unreachable(); } while (0)
 #else
-#  define ZSTD_UNREACHABLE { assert(0); }
+#  define ZSTD_UNREACHABLE do { assert(0); } while (0)
 #endif
 
 /* disable warnings */
@@ -281,6 +305,74 @@
 *  Sanitizer
 *****************************************************************/
 
+/**
+ * Zstd relies on pointer overflow in its decompressor.
+ * We add this attribute to functions that rely on pointer overflow.
+ */
+#ifndef ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+#  if __has_attribute(no_sanitize)
+#    if !defined(__clang__) && defined(__GNUC__) && __GNUC__ < 8
+       /* gcc < 8 only has signed-integer-overlow which triggers on pointer overflow */
+#      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("signed-integer-overflow")))
+#    else
+       /* older versions of clang [3.7, 5.0) will warn that pointer-overflow is ignored. */
+#      define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR __attribute__((no_sanitize("pointer-overflow")))
+#    endif
+#  else
+#    define ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+#  endif
+#endif
+
+/**
+ * Helper function to perform a wrapped pointer difference without trigging
+ * UBSAN.
+ *
+ * @returns lhs - rhs with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+ptrdiff_t ZSTD_wrappedPtrDiff(unsigned char const* lhs, unsigned char const* rhs)
+{
+    return lhs - rhs;
+}
+
+/**
+ * Helper function to perform a wrapped pointer add without triggering UBSAN.
+ *
+ * @return ptr + add with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+unsigned char const* ZSTD_wrappedPtrAdd(unsigned char const* ptr, ptrdiff_t add)
+{
+    return ptr + add;
+}
+
+/**
+ * Helper function to perform a wrapped pointer subtraction without triggering
+ * UBSAN.
+ *
+ * @return ptr - sub with wrapping
+ */
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+unsigned char const* ZSTD_wrappedPtrSub(unsigned char const* ptr, ptrdiff_t sub)
+{
+    return ptr - sub;
+}
+
+/**
+ * Helper function to add to a pointer that works around C's undefined behavior
+ * of adding 0 to NULL.
+ *
+ * @returns `ptr + add` except it defines `NULL + 0 == NULL`.
+ */
+MEM_STATIC
+unsigned char* ZSTD_maybeNullPtrAdd(unsigned char* ptr, ptrdiff_t add)
+{
+    return add > 0 ? ptr + add : ptr;
+}
+
 /* Issue #3240 reports an ASAN failure on an llvm-mingw build. Out of an
  * abundance of caution, disable our custom poisoning on mingw. */
 #ifdef __MINGW32__
diff --git a/thirdparty/zstd/common/cpu.h b/thirdparty/zstd/common/cpu.h
index 8bc34a36da..0e684d9ad8 100644
--- a/thirdparty/zstd/common/cpu.h
+++ b/thirdparty/zstd/common/cpu.h
@@ -35,6 +35,7 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
     U32 f7b = 0;
     U32 f7c = 0;
 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
+#if !defined(__clang__)
     int reg[4];
     __cpuid((int*)reg, 0);
     {
@@ -50,6 +51,41 @@ MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
             f7c = (U32)reg[2];
         }
     }
+#else
+    /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
+     * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
+     * to due to being a reserved register. So in that case, do the `cpuid`
+     * ourselves. Clang supports inline assembly anyway.
+     */
+    U32 n;
+    __asm__(
+        "pushq %%rbx\n\t"
+        "cpuid\n\t"
+        "popq %%rbx\n\t"
+        : "=a"(n)
+        : "a"(0)
+        : "rcx", "rdx");
+    if (n >= 1) {
+      U32 f1a;
+      __asm__(
+          "pushq %%rbx\n\t"
+          "cpuid\n\t"
+          "popq %%rbx\n\t"
+          : "=a"(f1a), "=c"(f1c), "=d"(f1d)
+          : "a"(1)
+          :);
+    }
+    if (n >= 7) {
+      __asm__(
+          "pushq %%rbx\n\t"
+          "cpuid\n\t"
+          "movq %%rbx, %%rax\n\t"
+          "popq %%rbx"
+          : "=a"(f7b), "=c"(f7c)
+          : "a"(7), "c"(0)
+          : "rdx");
+    }
+#endif
 #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
     /* The following block like the normal cpuid branch below, but gcc
      * reserves ebx for use of its pic register so we must specially
diff --git a/thirdparty/zstd/common/debug.c b/thirdparty/zstd/common/debug.c
index ebf7bfccfa..9d0b7d229c 100644
--- a/thirdparty/zstd/common/debug.c
+++ b/thirdparty/zstd/common/debug.c
@@ -21,4 +21,10 @@
 
 #include "debug.h"
 
+#if !defined(ZSTD_LINUX_KERNEL) || (DEBUGLEVEL>=2)
+/* We only use this when DEBUGLEVEL>=2, but we get -Werror=pedantic errors if a
+ * translation unit is empty. So remove this from Linux kernel builds, but
+ * otherwise just leave it in.
+ */
 int g_debuglevel = DEBUGLEVEL;
+#endif
diff --git a/thirdparty/zstd/common/debug.h b/thirdparty/zstd/common/debug.h
index 0e9817ea6d..a16b69e574 100644
--- a/thirdparty/zstd/common/debug.h
+++ b/thirdparty/zstd/common/debug.h
@@ -85,18 +85,27 @@ extern int g_debuglevel; /* the variable is only declared,
                             It's useful when enabling very verbose levels
                             on selective conditions (such as position in src) */
 
-#  define RAWLOG(l, ...) {                                       \
-                if (l<=g_debuglevel) {                           \
-                    ZSTD_DEBUG_PRINT(__VA_ARGS__);               \
-            }   }
-#  define DEBUGLOG(l, ...) {                                     \
-                if (l<=g_debuglevel) {                           \
-                    ZSTD_DEBUG_PRINT(__FILE__ ": " __VA_ARGS__); \
-                    ZSTD_DEBUG_PRINT(" \n");                     \
-            }   }
+#  define RAWLOG(l, ...)                   \
+    do {                                   \
+        if (l<=g_debuglevel) {             \
+            ZSTD_DEBUG_PRINT(__VA_ARGS__); \
+        }                                  \
+    } while (0)
+
+#define STRINGIFY(x) #x
+#define TOSTRING(x) STRINGIFY(x)
+#define LINE_AS_STRING TOSTRING(__LINE__)
+
+#  define DEBUGLOG(l, ...)                               \
+    do {                                                 \
+        if (l<=g_debuglevel) {                           \
+            ZSTD_DEBUG_PRINT(__FILE__ ":" LINE_AS_STRING ": " __VA_ARGS__); \
+            ZSTD_DEBUG_PRINT(" \n");                     \
+        }                                                \
+    } while (0)
 #else
-#  define RAWLOG(l, ...)      {}    /* disabled */
-#  define DEBUGLOG(l, ...)    {}    /* disabled */
+#  define RAWLOG(l, ...)   do { } while (0)    /* disabled */
+#  define DEBUGLOG(l, ...) do { } while (0)    /* disabled */
 #endif
 
 
diff --git a/thirdparty/zstd/common/error_private.h b/thirdparty/zstd/common/error_private.h
index 325daad404..0156010c74 100644
--- a/thirdparty/zstd/common/error_private.h
+++ b/thirdparty/zstd/common/error_private.h
@@ -60,8 +60,13 @@ ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
 
 /* check and forward error code */
-#define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
-#define CHECK_F(f)   { CHECK_V_F(_var_err__, f); }
+#define CHECK_V_F(e, f)     \
+    size_t const e = f;     \
+    do {                    \
+        if (ERR_isError(e)) \
+            return e;       \
+    } while (0)
+#define CHECK_F(f)   do { CHECK_V_F(_var_err__, f); } while (0)
 
 
 /*-****************************************
@@ -95,10 +100,12 @@ void _force_has_format_string(const char *format, ...) {
  * We want to force this function invocation to be syntactically correct, but
  * we don't want to force runtime evaluation of its arguments.
  */
-#define _FORCE_HAS_FORMAT_STRING(...) \
-  if (0) { \
-    _force_has_format_string(__VA_ARGS__); \
-  }
+#define _FORCE_HAS_FORMAT_STRING(...)              \
+    do {                                           \
+        if (0) {                                   \
+            _force_has_format_string(__VA_ARGS__); \
+        }                                          \
+    } while (0)
 
 #define ERR_QUOTE(str) #str
 
@@ -109,48 +116,50 @@ void _force_has_format_string(const char *format, ...) {
  * In order to do that (particularly, printing the conditional that failed),
  * this can't just wrap RETURN_ERROR().
  */
-#define RETURN_ERROR_IF(cond, err, ...) \
-  if (cond) { \
-    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
-           __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  }
+#define RETURN_ERROR_IF(cond, err, ...)                                        \
+    do {                                                                       \
+        if (cond) {                                                            \
+            RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s",          \
+                  __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
+            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                             \
+            RAWLOG(3, ": " __VA_ARGS__);                                       \
+            RAWLOG(3, "\n");                                                   \
+            return ERROR(err);                                                 \
+        }                                                                      \
+    } while (0)
 
 /**
  * Unconditionally return the specified error.
  *
  * In debug modes, prints additional information.
  */
-#define RETURN_ERROR(err, ...) \
-  do { \
-    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
-           __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  } while(0);
+#define RETURN_ERROR(err, ...)                                               \
+    do {                                                                     \
+        RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+              __FILE__, __LINE__, ERR_QUOTE(ERROR(err)));                    \
+        _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                               \
+        RAWLOG(3, ": " __VA_ARGS__);                                         \
+        RAWLOG(3, "\n");                                                     \
+        return ERROR(err);                                                   \
+    } while(0)
 
 /**
  * If the provided expression evaluates to an error code, returns that error code.
  *
  * In debug modes, prints additional information.
  */
-#define FORWARD_IF_ERROR(err, ...) \
-  do { \
-    size_t const err_code = (err); \
-    if (ERR_isError(err_code)) { \
-      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
-             __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
-      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-      RAWLOG(3, ": " __VA_ARGS__); \
-      RAWLOG(3, "\n"); \
-      return err_code; \
-    } \
-  } while(0);
+#define FORWARD_IF_ERROR(err, ...)                                                 \
+    do {                                                                           \
+        size_t const err_code = (err);                                             \
+        if (ERR_isError(err_code)) {                                               \
+            RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s",                 \
+                  __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
+            _FORCE_HAS_FORMAT_STRING(__VA_ARGS__);                                 \
+            RAWLOG(3, ": " __VA_ARGS__);                                           \
+            RAWLOG(3, "\n");                                                       \
+            return err_code;                                                       \
+        }                                                                          \
+    } while(0)
 
 #if defined (__cplusplus)
 }
diff --git a/thirdparty/zstd/common/fse.h b/thirdparty/zstd/common/fse.h
index 02a1f0bc53..2ae128e60d 100644
--- a/thirdparty/zstd/common/fse.h
+++ b/thirdparty/zstd/common/fse.h
@@ -229,6 +229,7 @@ If there is an error, the function will return an error code, which can be teste
 
 #endif  /* FSE_H */
 
+
 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
 #define FSE_H_FSE_STATIC_LINKING_ONLY
 
@@ -464,13 +465,13 @@ MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, un
     FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
     const U16* const stateTable = (const U16*)(statePtr->stateTable);
     U32 const nbBitsOut  = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
-    BIT_addBits(bitC, statePtr->value, nbBitsOut);
+    BIT_addBits(bitC,  (size_t)statePtr->value, nbBitsOut);
     statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
 }
 
 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
 {
-    BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
+    BIT_addBits(bitC, (size_t)statePtr->value, statePtr->stateLog);
     BIT_flushBits(bitC);
 }
 
diff --git a/thirdparty/zstd/common/fse_decompress.c b/thirdparty/zstd/common/fse_decompress.c
index 1e1c9f92d6..0dcc4640d0 100644
--- a/thirdparty/zstd/common/fse_decompress.c
+++ b/thirdparty/zstd/common/fse_decompress.c
@@ -22,8 +22,7 @@
 #define FSE_STATIC_LINKING_ONLY
 #include "fse.h"
 #include "error_private.h"
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"
+#include "zstd_deps.h"  /* ZSTD_memcpy */
 #include "bits.h"       /* ZSTD_highbit32 */
 
 
@@ -84,7 +83,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
                     symbolNext[s] = 1;
                 } else {
                     if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
-                    symbolNext[s] = normalizedCounter[s];
+                    symbolNext[s] = (U16)normalizedCounter[s];
         }   }   }
         ZSTD_memcpy(dt, &DTableH, sizeof(DTableH));
     }
@@ -99,8 +98,7 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
          * all symbols have counts <= 8. We ensure we have 8 bytes at the end of
          * our buffer to handle the over-write.
          */
-        {
-            U64 const add = 0x0101010101010101ull;
+        {   U64 const add = 0x0101010101010101ull;
             size_t pos = 0;
             U64 sv = 0;
             U32 s;
@@ -111,9 +109,8 @@ static size_t FSE_buildDTable_internal(FSE_DTable* dt, const short* normalizedCo
                 for (i = 8; i < n; i += 8) {
                     MEM_write64(spread + pos + i, sv);
                 }
-                pos += n;
-            }
-        }
+                pos += (size_t)n;
+        }   }
         /* Now we spread those positions across the table.
          * The benefit of doing it in two stages is that we avoid the
          * variable size inner loop, which caused lots of branch misses.
@@ -232,12 +229,12 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_usingDTable_generic(
             break;
     }   }
 
-    return op-ostart;
+    assert(op >= ostart);
+    return (size_t)(op-ostart);
 }
 
 typedef struct {
     short ncount[FSE_MAX_SYMBOL_VALUE + 1];
-    FSE_DTable dtable[1]; /* Dynamically sized */
 } FSE_DecompressWksp;
 
 
@@ -252,13 +249,18 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
     unsigned tableLog;
     unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
     FSE_DecompressWksp* const wksp = (FSE_DecompressWksp*)workSpace;
+    size_t const dtablePos = sizeof(FSE_DecompressWksp) / sizeof(FSE_DTable);
+    FSE_DTable* const dtable = (FSE_DTable*)workSpace + dtablePos;
 
-    DEBUG_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
+    FSE_STATIC_ASSERT((FSE_MAX_SYMBOL_VALUE + 1) % 2 == 0);
     if (wkspSize < sizeof(*wksp)) return ERROR(GENERIC);
 
+    /* correct offset to dtable depends on this property */
+    FSE_STATIC_ASSERT(sizeof(FSE_DecompressWksp) % sizeof(FSE_DTable) == 0);
+
     /* normal FSE decoding mode */
-    {
-        size_t const NCountLength = FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
+    {   size_t const NCountLength =
+            FSE_readNCount_bmi2(wksp->ncount, &maxSymbolValue, &tableLog, istart, cSrcSize, bmi2);
         if (FSE_isError(NCountLength)) return NCountLength;
         if (tableLog > maxLog) return ERROR(tableLog_tooLarge);
         assert(NCountLength <= cSrcSize);
@@ -271,16 +273,16 @@ FORCE_INLINE_TEMPLATE size_t FSE_decompress_wksp_body(
     workSpace = (BYTE*)workSpace + sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
     wkspSize -= sizeof(*wksp) + FSE_DTABLE_SIZE(tableLog);
 
-    CHECK_F( FSE_buildDTable_internal(wksp->dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
+    CHECK_F( FSE_buildDTable_internal(dtable, wksp->ncount, maxSymbolValue, tableLog, workSpace, wkspSize) );
 
     {
-        const void* ptr = wksp->dtable;
+        const void* ptr = dtable;
         const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
         const U32 fastMode = DTableH->fastMode;
 
         /* select fast mode (static) */
-        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 1);
-        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, wksp->dtable, 0);
+        if (fastMode) return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 1);
+        return FSE_decompress_usingDTable_generic(dst, dstCapacity, ip, cSrcSize, dtable, 0);
     }
 }
 
diff --git a/thirdparty/zstd/common/huf.h b/thirdparty/zstd/common/huf.h
index 73d1ee5654..99bf85d6f4 100644
--- a/thirdparty/zstd/common/huf.h
+++ b/thirdparty/zstd/common/huf.h
@@ -197,9 +197,22 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
 
 /** HUF_getNbBitsFromCTable() :
  *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
- *  Note 1 : is not inlined, as HUF_CElt definition is private */
+ *  Note 1 : If symbolValue > HUF_readCTableHeader(symbolTable).maxSymbolValue, returns 0
+ *  Note 2 : is not inlined, as HUF_CElt definition is private
+ */
 U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
 
+typedef struct {
+    BYTE tableLog;
+    BYTE maxSymbolValue;
+    BYTE unused[sizeof(size_t) - 2];
+} HUF_CTableHeader;
+
+/** HUF_readCTableHeader() :
+ *  @returns The header from the CTable specifying the tableLog and the maxSymbolValue.
+ */
+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable);
+
 /*
  * HUF_decompress() does the following:
  * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
diff --git a/thirdparty/zstd/common/mem.h b/thirdparty/zstd/common/mem.h
index 98dd47a047..096f4be519 100644
--- a/thirdparty/zstd/common/mem.h
+++ b/thirdparty/zstd/common/mem.h
@@ -31,15 +31,6 @@ extern "C" {
 #   include <stdlib.h>  /* _byteswap_ulong */
 #   include <intrin.h>  /* _byteswap_* */
 #endif
-#if defined(__GNUC__)
-#  define MEM_STATIC static __inline __attribute__((unused))
-#elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#  define MEM_STATIC static inline
-#elif defined(_MSC_VER)
-#  define MEM_STATIC static __inline
-#else
-#  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */
-#endif
 
 /*-**************************************************************
 *  Basic Types
diff --git a/thirdparty/zstd/common/pool.c b/thirdparty/zstd/common/pool.c
index d5ca5a7808..3adcefc9a5 100644
--- a/thirdparty/zstd/common/pool.c
+++ b/thirdparty/zstd/common/pool.c
@@ -223,7 +223,7 @@ static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
     {   ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_customCalloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
         if (!threadPool) return 1;
         /* replace existing thread pool */
-        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
+        ZSTD_memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(ZSTD_pthread_t));
         ZSTD_customFree(ctx->threads, ctx->customMem);
         ctx->threads = threadPool;
         /* Initialize additional threads */
diff --git a/thirdparty/zstd/common/pool.h b/thirdparty/zstd/common/pool.h
index eb22ff509f..cca4de73a8 100644
--- a/thirdparty/zstd/common/pool.h
+++ b/thirdparty/zstd/common/pool.h
@@ -47,7 +47,7 @@ void POOL_joinJobs(POOL_ctx* ctx);
 /*! POOL_resize() :
  *  Expands or shrinks pool's number of threads.
  *  This is more efficient than releasing + creating a new context,
- *  since it tries to preserve and re-use existing threads.
+ *  since it tries to preserve and reuse existing threads.
  * `numThreads` must be at least 1.
  * @return : 0 when resize was successful,
  *           !0 (typically 1) if there is an error.
diff --git a/thirdparty/zstd/common/portability_macros.h b/thirdparty/zstd/common/portability_macros.h
index 8fd6ea82d1..e50314a78e 100644
--- a/thirdparty/zstd/common/portability_macros.h
+++ b/thirdparty/zstd/common/portability_macros.h
@@ -68,6 +68,8 @@
 /* Mark the internal assembly functions as hidden  */
 #ifdef __ELF__
 # define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
+#elif defined(__APPLE__)
+# define ZSTD_HIDE_ASM_FUNCTION(func) .private_extern func
 #else
 # define ZSTD_HIDE_ASM_FUNCTION(func)
 #endif
diff --git a/thirdparty/zstd/common/threading.c b/thirdparty/zstd/common/threading.c
index ca155b9b9d..25bb8b9810 100644
--- a/thirdparty/zstd/common/threading.c
+++ b/thirdparty/zstd/common/threading.c
@@ -73,10 +73,12 @@ int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
     ZSTD_thread_params_t thread_param;
     (void)unused;
 
+    if (thread==NULL) return -1;
+    *thread = NULL;
+
     thread_param.start_routine = start_routine;
     thread_param.arg = arg;
     thread_param.initialized = 0;
-    *thread = NULL;
 
     /* Setup thread initialization synchronization */
     if(ZSTD_pthread_cond_init(&thread_param.initialized_cond, NULL)) {
@@ -91,7 +93,7 @@ int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
 
     /* Spawn thread */
     *thread = (HANDLE)_beginthreadex(NULL, 0, worker, &thread_param, 0, NULL);
-    if (!thread) {
+    if (*thread==NULL) {
         ZSTD_pthread_mutex_destroy(&thread_param.initialized_mutex);
         ZSTD_pthread_cond_destroy(&thread_param.initialized_cond);
         return errno;
@@ -137,6 +139,7 @@ int ZSTD_pthread_join(ZSTD_pthread_t thread)
 
 int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t const* attr)
 {
+    assert(mutex != NULL);
     *mutex = (pthread_mutex_t*)ZSTD_malloc(sizeof(pthread_mutex_t));
     if (!*mutex)
         return 1;
@@ -145,6 +148,7 @@ int ZSTD_pthread_mutex_init(ZSTD_pthread_mutex_t* mutex, pthread_mutexattr_t con
 
 int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
 {
+    assert(mutex != NULL);
     if (!*mutex)
         return 0;
     {
@@ -156,6 +160,7 @@ int ZSTD_pthread_mutex_destroy(ZSTD_pthread_mutex_t* mutex)
 
 int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const* attr)
 {
+    assert(cond != NULL);
     *cond = (pthread_cond_t*)ZSTD_malloc(sizeof(pthread_cond_t));
     if (!*cond)
         return 1;
@@ -164,6 +169,7 @@ int ZSTD_pthread_cond_init(ZSTD_pthread_cond_t* cond, pthread_condattr_t const*
 
 int ZSTD_pthread_cond_destroy(ZSTD_pthread_cond_t* cond)
 {
+    assert(cond != NULL);
     if (!*cond)
         return 0;
     {
diff --git a/thirdparty/zstd/common/xxhash.c b/thirdparty/zstd/common/xxhash.c
index fd237c9062..052cd52282 100644
--- a/thirdparty/zstd/common/xxhash.c
+++ b/thirdparty/zstd/common/xxhash.c
@@ -1,24 +1,18 @@
 /*
- *  xxHash - Fast Hash algorithm
- *  Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- *  You can contact the author at :
- *  - xxHash homepage: https://cyan4973.github.io/xxHash/
- *  - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Extremely Fast Hash algorithm
+ * Copyright (c) Yann Collet - Meta Platforms, Inc
  *
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
  * You may select, at your option, one of the above-listed licenses.
-*/
-
-
+ */
 
 /*
  * xxhash.c instantiates functions defined in xxhash.h
  */
 
-#define XXH_STATIC_LINKING_ONLY   /* access advanced declarations */
-#define XXH_IMPLEMENTATION   /* access definitions */
+#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
+#define XXH_IMPLEMENTATION      /* access definitions */
 
 #include "xxhash.h"
diff --git a/thirdparty/zstd/common/xxhash.h b/thirdparty/zstd/common/xxhash.h
index b8b73290bb..e59e44267c 100644
--- a/thirdparty/zstd/common/xxhash.h
+++ b/thirdparty/zstd/common/xxhash.h
@@ -1,17 +1,15 @@
 /*
- *  xxHash - Fast Hash algorithm
- *  Copyright (c) Meta Platforms, Inc. and affiliates.
- *
- *  You can contact the author at :
- *  - xxHash homepage: https://cyan4973.github.io/xxHash/
- *  - xxHash source repository : https://github.com/Cyan4973/xxHash
+ * xxHash - Extremely Fast Hash algorithm
+ * Header File
+ * Copyright (c) Yann Collet - Meta Platforms, Inc
  *
  * This source code is licensed under both the BSD-style license (found in the
  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
  * in the COPYING file in the root directory of this source tree).
  * You may select, at your option, one of the above-listed licenses.
-*/
+ */
 
+/* Local adaptations for Zstandard */
 
 #ifndef XXH_NO_XXH3
 # define XXH_NO_XXH3
@@ -24,46 +22,210 @@
 /*!
  * @mainpage xxHash
  *
+ * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
+ * limits.
+ *
+ * It is proposed in four flavors, in three families:
+ * 1. @ref XXH32_family
+ *   - Classic 32-bit hash function. Simple, compact, and runs on almost all
+ *     32-bit and 64-bit systems.
+ * 2. @ref XXH64_family
+ *   - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
+ *     64-bit systems (but _not_ 32-bit systems).
+ * 3. @ref XXH3_family
+ *   - Modern 64-bit and 128-bit hash function family which features improved
+ *     strength and performance across the board, especially on smaller data.
+ *     It benefits greatly from SIMD and 64-bit without requiring it.
+ *
+ * Benchmarks
+ * ---
+ * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
+ * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
+ *
+ * | Hash Name            | ISA ext | Width | Large Data Speed | Small Data Velocity |
+ * | -------------------- | ------- | ----: | ---------------: | ------------------: |
+ * | XXH3_64bits()        | @b AVX2 |    64 |        59.4 GB/s |               133.1 |
+ * | MeowHash             | AES-NI  |   128 |        58.2 GB/s |                52.5 |
+ * | XXH3_128bits()       | @b AVX2 |   128 |        57.9 GB/s |               118.1 |
+ * | CLHash               | PCLMUL  |    64 |        37.1 GB/s |                58.1 |
+ * | XXH3_64bits()        | @b SSE2 |    64 |        31.5 GB/s |               133.1 |
+ * | XXH3_128bits()       | @b SSE2 |   128 |        29.6 GB/s |               118.1 |
+ * | RAM sequential read  |         |   N/A |        28.0 GB/s |                 N/A |
+ * | ahash                | AES-NI  |    64 |        22.5 GB/s |               107.2 |
+ * | City64               |         |    64 |        22.0 GB/s |                76.6 |
+ * | T1ha2                |         |    64 |        22.0 GB/s |                99.0 |
+ * | City128              |         |   128 |        21.7 GB/s |                57.7 |
+ * | FarmHash             | AES-NI  |    64 |        21.3 GB/s |                71.9 |
+ * | XXH64()              |         |    64 |        19.4 GB/s |                71.0 |
+ * | SpookyHash           |         |    64 |        19.3 GB/s |                53.2 |
+ * | Mum                  |         |    64 |        18.0 GB/s |                67.0 |
+ * | CRC32C               | SSE4.2  |    32 |        13.0 GB/s |                57.9 |
+ * | XXH32()              |         |    32 |         9.7 GB/s |                71.9 |
+ * | City32               |         |    32 |         9.1 GB/s |                66.0 |
+ * | Blake3*              | @b AVX2 |   256 |         4.4 GB/s |                 8.1 |
+ * | Murmur3              |         |    32 |         3.9 GB/s |                56.1 |
+ * | SipHash*             |         |    64 |         3.0 GB/s |                43.2 |
+ * | Blake3*              | @b SSE2 |   256 |         2.4 GB/s |                 8.1 |
+ * | HighwayHash          |         |    64 |         1.4 GB/s |                 6.0 |
+ * | FNV64                |         |    64 |         1.2 GB/s |                62.7 |
+ * | Blake2*              |         |   256 |         1.1 GB/s |                 5.1 |
+ * | SHA1*                |         |   160 |         0.8 GB/s |                 5.6 |
+ * | MD5*                 |         |   128 |         0.6 GB/s |                 7.8 |
+ * @note
+ *   - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
+ *     even though it is mandatory on x64.
+ *   - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
+ *     by modern standards.
+ *   - Small data velocity is a rough average of algorithm's efficiency for small
+ *     data. For more accurate information, see the wiki.
+ *   - More benchmarks and strength tests are found on the wiki:
+ *         https://github.com/Cyan4973/xxHash/wiki
+ *
+ * Usage
+ * ------
+ * All xxHash variants use a similar API. Changing the algorithm is a trivial
+ * substitution.
+ *
+ * @pre
+ *    For functions which take an input and length parameter, the following
+ *    requirements are assumed:
+ *    - The range from [`input`, `input + length`) is valid, readable memory.
+ *      - The only exception is if the `length` is `0`, `input` may be `NULL`.
+ *    - For C++, the objects must have the *TriviallyCopyable* property, as the
+ *      functions access bytes directly as if it was an array of `unsigned char`.
+ *
+ * @anchor single_shot_example
+ * **Single Shot**
+ *
+ * These functions are stateless functions which hash a contiguous block of memory,
+ * immediately returning the result. They are the easiest and usually the fastest
+ * option.
+ *
+ * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
+ *
+ * @code{.c}
+ *   #include <string.h>
+ *   #include "xxhash.h"
+ *
+ *   // Example for a function which hashes a null terminated string with XXH32().
+ *   XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
+ *   {
+ *       // NULL pointers are only valid if the length is zero
+ *       size_t length = (string == NULL) ? 0 : strlen(string);
+ *       return XXH32(string, length, seed);
+ *   }
+ * @endcode
+ *
+ *
+ * @anchor streaming_example
+ * **Streaming**
+ *
+ * These groups of functions allow incremental hashing of unknown size, even
+ * more than what would fit in a size_t.
+ *
+ * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
+ *
+ * @code{.c}
+ *   #include <stdio.h>
+ *   #include <assert.h>
+ *   #include "xxhash.h"
+ *   // Example for a function which hashes a FILE incrementally with XXH3_64bits().
+ *   XXH64_hash_t hashFile(FILE* f)
+ *   {
+ *       // Allocate a state struct. Do not just use malloc() or new.
+ *       XXH3_state_t* state = XXH3_createState();
+ *       assert(state != NULL && "Out of memory!");
+ *       // Reset the state to start a new hashing session.
+ *       XXH3_64bits_reset(state);
+ *       char buffer[4096];
+ *       size_t count;
+ *       // Read the file in chunks
+ *       while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
+ *           // Run update() as many times as necessary to process the data
+ *           XXH3_64bits_update(state, buffer, count);
+ *       }
+ *       // Retrieve the finalized hash. This will not change the state.
+ *       XXH64_hash_t result = XXH3_64bits_digest(state);
+ *       // Free the state. Do not use free().
+ *       XXH3_freeState(state);
+ *       return result;
+ *   }
+ * @endcode
+ *
+ * Streaming functions generate the xxHash value from an incremental input.
+ * This method is slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * An XXH state must first be allocated using `XXH*_createState()`.
+ *
+ * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
+ *
+ * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
+ *
+ * The function returns an error code, with 0 meaning OK, and any other value
+ * meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a
+ * digest, and generate new hash values later on by invoking `XXH*_digest()`.
+ *
+ * When done, release the state using `XXH*_freeState()`.
+ *
+ *
+ * @anchor canonical_representation_example
+ * **Canonical Representation**
+ *
+ * The default return values from XXH functions are unsigned 32, 64 and 128 bit
+ * integers.
+ * This the simplest and fastest format for further post-processing.
+ *
+ * However, this leaves open the question of what is the order on the byte level,
+ * since little and big endian conventions will store the same number differently.
+ *
+ * The canonical representation settles this issue by mandating big-endian
+ * convention, the same convention as human-readable numbers (large digits first).
+ *
+ * When writing hash values to storage, sending them over a network, or printing
+ * them, it's highly recommended to use the canonical representation to ensure
+ * portability across a wider range of systems, present and future.
+ *
+ * The following functions allow transformation of hash values to and from
+ * canonical format.
+ *
+ * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(),
+ * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(),
+ * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(),
+ *
+ * @code{.c}
+ *   #include <stdio.h>
+ *   #include "xxhash.h"
+ *
+ *   // Example for a function which prints XXH32_hash_t in human readable format
+ *   void printXxh32(XXH32_hash_t hash)
+ *   {
+ *       XXH32_canonical_t cano;
+ *       XXH32_canonicalFromHash(&cano, hash);
+ *       size_t i;
+ *       for(i = 0; i < sizeof(cano.digest); ++i) {
+ *           printf("%02x", cano.digest[i]);
+ *       }
+ *       printf("\n");
+ *   }
+ *
+ *   // Example for a function which converts XXH32_canonical_t to XXH32_hash_t
+ *   XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano)
+ *   {
+ *       XXH32_hash_t hash = XXH32_hashFromCanonical(&cano);
+ *       return hash;
+ *   }
+ * @endcode
+ *
+ *
  * @file xxhash.h
  * xxHash prototypes and implementation
  */
-/* TODO: update */
-/* Notice extracted from xxHash homepage:
-
-xxHash is an extremely fast hash algorithm, running at RAM speed limits.
-It also successfully passes all tests from the SMHasher suite.
-
-Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
-
-Name            Speed       Q.Score   Author
-xxHash          5.4 GB/s     10
-CrapWow         3.2 GB/s      2       Andrew
-MurmurHash 3a   2.7 GB/s     10       Austin Appleby
-SpookyHash      2.0 GB/s     10       Bob Jenkins
-SBox            1.4 GB/s      9       Bret Mulvey
-Lookup3         1.2 GB/s      9       Bob Jenkins
-SuperFastHash   1.2 GB/s      1       Paul Hsieh
-CityHash64      1.05 GB/s    10       Pike & Alakuijala
-FNV             0.55 GB/s     5       Fowler, Noll, Vo
-CRC32           0.43 GB/s     9
-MD5-32          0.33 GB/s    10       Ronald L. Rivest
-SHA1-32         0.28 GB/s    10
-
-Q.Score is a measure of quality of the hash function.
-It depends on successfully passing SMHasher test set.
-10 is a perfect score.
-
-Note: SMHasher's CRC32 implementation is not the fastest one.
-Other speed-oriented implementations can be faster,
-especially in combination with PCLMUL instruction:
-https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html?showComment=1552696407071#c3490092340461170735
-
-A 64-bit version, named XXH64, is available since r35.
-It offers much better speed, but for 64-bit applications only.
-Name     Speed on 64 bits    Speed on 32 bits
-XXH64       13.8 GB/s            1.9 GB/s
-XXH32        6.8 GB/s            6.0 GB/s
-*/
 
 #if defined (__cplusplus)
 extern "C" {
@@ -73,21 +235,80 @@ extern "C" {
  *  INLINE mode
  ******************************/
 /*!
- * XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * @defgroup public Public API
+ * Contains details on the public xxHash functions.
+ * @{
+ */
+#ifdef XXH_DOXYGEN
+/*!
+ * @brief Gives access to internal state declaration, required for static allocation.
+ *
+ * Incompatible with dynamic linking, due to risks of ABI changes.
+ *
+ * Usage:
+ * @code{.c}
+ *     #define XXH_STATIC_LINKING_ONLY
+ *     #include "xxhash.h"
+ * @endcode
+ */
+#  define XXH_STATIC_LINKING_ONLY
+/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */
+
+/*!
+ * @brief Gives access to internal definitions.
+ *
+ * Usage:
+ * @code{.c}
+ *     #define XXH_STATIC_LINKING_ONLY
+ *     #define XXH_IMPLEMENTATION
+ *     #include "xxhash.h"
+ * @endcode
+ */
+#  define XXH_IMPLEMENTATION
+/* Do not undef XXH_IMPLEMENTATION for Doxygen */
+
+/*!
+ * @brief Exposes the implementation and marks all functions as `inline`.
+ *
  * Use these build macros to inline xxhash into the target unit.
  * Inlining improves performance on small inputs, especially when the length is
  * expressed as a compile-time constant:
  *
- *      https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
+ *  https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
  *
  * It also keeps xxHash symbols private to the unit, so they are not exported.
  *
  * Usage:
+ * @code{.c}
  *     #define XXH_INLINE_ALL
  *     #include "xxhash.h"
- *
+ * @endcode
  * Do not compile and link xxhash.o as a separate object, as it is not useful.
  */
+#  define XXH_INLINE_ALL
+#  undef XXH_INLINE_ALL
+/*!
+ * @brief Exposes the implementation without marking functions as inline.
+ */
+#  define XXH_PRIVATE_API
+#  undef XXH_PRIVATE_API
+/*!
+ * @brief Emulate a namespace by transparently prefixing all symbols.
+ *
+ * If you want to include _and expose_ xxHash functions from within your own
+ * library, but also want to avoid symbol collisions with other libraries which
+ * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
+ * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
+ * (therefore, avoid empty or numeric values).
+ *
+ * Note that no change is required within the calling program as long as it
+ * includes `xxhash.h`: Regular symbol names will be automatically translated
+ * by this header.
+ */
+#  define XXH_NAMESPACE /* YOUR NAME HERE */
+#  undef XXH_NAMESPACE
+#endif
+
 #if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
     && !defined(XXH_INLINE_ALL_31684351384)
    /* this section should be traversed only once */
@@ -202,21 +423,13 @@ extern "C" {
 #  undef XXHASH_H_STATIC_13879238742
 #endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
 
-
-
 /* ****************************************************************
  *  Stable API
  *****************************************************************/
 #ifndef XXHASH_H_5627135585666179
 #define XXHASH_H_5627135585666179 1
 
-
-/*!
- * @defgroup public Public API
- * Contains details on the public xxHash functions.
- * @{
- */
-/* specific declaration modes for Windows */
+/*! @brief Marks a global symbol. */
 #if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
 #  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
 #    ifdef XXH_EXPORT
@@ -229,24 +442,6 @@ extern "C" {
 #  endif
 #endif
 
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Emulate a namespace by transparently prefixing all symbols.
- *
- * If you want to include _and expose_ xxHash functions from within your own
- * library, but also want to avoid symbol collisions with other libraries which
- * may also include xxHash, you can use XXH_NAMESPACE to automatically prefix
- * any public symbol from xxhash library with the value of XXH_NAMESPACE
- * (therefore, avoid empty or numeric values).
- *
- * Note that no change is required within the calling program as long as it
- * includes `xxhash.h`: Regular symbol names will be automatically translated
- * by this header.
- */
-#  define XXH_NAMESPACE /* YOUR NAME HERE */
-#  undef XXH_NAMESPACE
-#endif
-
 #ifdef XXH_NAMESPACE
 #  define XXH_CAT(A,B) A##B
 #  define XXH_NAME2(A,B) XXH_CAT(A,B)
@@ -307,11 +502,39 @@ extern "C" {
 
 
 /* *************************************
+*  Compiler specifics
+***************************************/
+
+/* specific declaration modes for Windows */
+#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
+#  if defined(WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
+#    ifdef XXH_EXPORT
+#      define XXH_PUBLIC_API __declspec(dllexport)
+#    elif XXH_IMPORT
+#      define XXH_PUBLIC_API __declspec(dllimport)
+#    endif
+#  else
+#    define XXH_PUBLIC_API   /* do nothing */
+#  endif
+#endif
+
+#if defined (__GNUC__)
+# define XXH_CONSTF  __attribute__((const))
+# define XXH_PUREF   __attribute__((pure))
+# define XXH_MALLOCF __attribute__((malloc))
+#else
+# define XXH_CONSTF  /* disable */
+# define XXH_PUREF
+# define XXH_MALLOCF
+#endif
+
+/* *************************************
 *  Version
 ***************************************/
 #define XXH_VERSION_MAJOR    0
 #define XXH_VERSION_MINOR    8
-#define XXH_VERSION_RELEASE  1
+#define XXH_VERSION_RELEASE  2
+/*! @brief Version number, encoded as two digits each */
 #define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
 
 /*!
@@ -320,16 +543,22 @@ extern "C" {
  * This is mostly useful when xxHash is compiled as a shared library,
  * since the returned value comes from the library, as opposed to header file.
  *
- * @return `XXH_VERSION_NUMBER` of the invoked library.
+ * @return @ref XXH_VERSION_NUMBER of the invoked library.
  */
-XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
 
 
 /* ****************************
 *  Common basic types
 ******************************/
 #include <stddef.h>   /* size_t */
-typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+/*!
+ * @brief Exit code for the streaming API.
+ */
+typedef enum {
+    XXH_OK = 0, /*!< OK */
+    XXH_ERROR   /*!< Error */
+} XXH_errorcode;
 
 
 /*-**********************************************************************
@@ -346,44 +575,44 @@ typedef uint32_t XXH32_hash_t;
 #elif !defined (__VMS) \
   && (defined (__cplusplus) \
   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   include <stdint.h>
+#   ifdef _AIX
+#     include <inttypes.h>
+#   else
+#     include <stdint.h>
+#   endif
     typedef uint32_t XXH32_hash_t;
 
 #else
 #   include <limits.h>
 #   if UINT_MAX == 0xFFFFFFFFUL
       typedef unsigned int XXH32_hash_t;
+#   elif ULONG_MAX == 0xFFFFFFFFUL
+      typedef unsigned long XXH32_hash_t;
 #   else
-#     if ULONG_MAX == 0xFFFFFFFFUL
-        typedef unsigned long XXH32_hash_t;
-#     else
-#       error "unsupported platform: need a 32-bit type"
-#     endif
+#     error "unsupported platform: need a 32-bit type"
 #   endif
 #endif
 
 /*!
  * @}
  *
- * @defgroup xxh32_family XXH32 family
+ * @defgroup XXH32_family XXH32 family
  * @ingroup public
  * Contains functions used in the classic 32-bit xxHash algorithm.
  *
  * @note
  *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
- *   Note that @ref xxh3_family provides competitive speed
- *   for both 32-bit and 64-bit systems, and offers true 64/128 bit hash results.
+ *   Note that the @ref XXH3_family provides competitive speed for both 32-bit
+ *   and 64-bit systems, and offers true 64/128 bit hash results.
  *
- * @see @ref xxh64_family, @ref xxh3_family : Other xxHash families
- * @see @ref xxh32_impl for implementation details
+ * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
+ * @see @ref XXH32_impl for implementation details
  * @{
  */
 
 /*!
  * @brief Calculates the 32-bit hash of @p input using xxHash32.
  *
- * Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark): 5.4 GB/s
- *
  * @param input The block of data to be hashed, at least @p length bytes in size.
  * @param length The length of @p input, in bytes.
  * @param seed The 32-bit seed to alter the hash's output predictably.
@@ -393,66 +622,13 @@ typedef uint32_t XXH32_hash_t;
  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
  *
- * @return The calculated 32-bit hash value.
- *
- * @see
- *    XXH64(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- *    Direct equivalents for the other variants of xxHash.
- * @see
- *    XXH32_createState(), XXH32_update(), XXH32_digest(): Streaming version.
- */
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
-
-/*!
- * Streaming functions generate the xxHash value from an incremental input.
- * This method is slower than single-call functions, due to state management.
- * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
- *
- * An XXH state must first be allocated using `XXH*_createState()`.
- *
- * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
- *
- * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
- *
- * The function returns an error code, with 0 meaning OK, and any other value
- * meaning there is an error.
- *
- * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
- * This function returns the nn-bits hash as an int or long long.
- *
- * It's still possible to continue inserting input into the hash state after a
- * digest, and generate new hash values later on by invoking `XXH*_digest()`.
- *
- * When done, release the state using `XXH*_freeState()`.
- *
- * Example code for incrementally hashing a file:
- * @code{.c}
- *    #include <stdio.h>
- *    #include <xxhash.h>
- *    #define BUFFER_SIZE 256
+ * @return The calculated 32-bit xxHash32 value.
  *
- *    // Note: XXH64 and XXH3 use the same interface.
- *    XXH32_hash_t
- *    hashFile(FILE* stream)
- *    {
- *        XXH32_state_t* state;
- *        unsigned char buf[BUFFER_SIZE];
- *        size_t amt;
- *        XXH32_hash_t hash;
- *
- *        state = XXH32_createState();       // Create a state
- *        assert(state != NULL);             // Error check here
- *        XXH32_reset(state, 0xbaad5eed);    // Reset state with our seed
- *        while ((amt = fread(buf, 1, sizeof(buf), stream)) != 0) {
- *            XXH32_update(state, buf, amt); // Hash the file in chunks
- *        }
- *        hash = XXH32_digest(state);        // Finalize the hash
- *        XXH32_freeState(state);            // Clean up
- *        return hash;
- *    }
- * @endcode
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
 
+#ifndef XXH_NO_STREAM
 /*!
  * @typedef struct XXH32_state_s XXH32_state_t
  * @brief The opaque state struct for the XXH32 streaming API.
@@ -464,16 +640,21 @@ typedef struct XXH32_state_s XXH32_state_t;
 /*!
  * @brief Allocates an @ref XXH32_state_t.
  *
- * Must be freed with XXH32_freeState().
- * @return An allocated XXH32_state_t on success, `NULL` on failure.
+ * @return An allocated pointer of @ref XXH32_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH32_freeState().
  */
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
 /*!
  * @brief Frees an @ref XXH32_state_t.
  *
- * Must be allocated with XXH32_createState().
  * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
- * @return XXH_OK.
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH32_createState().
+ *
  */
 XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
 /*!
@@ -489,23 +670,22 @@ XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_
 /*!
  * @brief Resets an @ref XXH32_state_t to begin a new hash.
  *
- * This function resets and seeds a state. Call it before @ref XXH32_update().
- *
  * @param statePtr The state struct to reset.
  * @param seed The 32-bit seed to alter the hash result predictably.
  *
  * @pre
  *   @p statePtr must not be `NULL`.
  *
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH32_update().
  */
 XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
 
 /*!
  * @brief Consumes a block of @p input to an @ref XXH32_state_t.
  *
- * Call this to incrementally consume blocks of data.
- *
  * @param statePtr The state struct to update.
  * @param input The block of data to be hashed, at least @p length bytes in size.
  * @param length The length of @p input, in bytes.
@@ -517,47 +697,32 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t
  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
  *
- * @return @ref XXH_OK on success, @ref XXH_ERROR on failure.
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
  */
 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
 
 /*!
  * @brief Returns the calculated hash value from an @ref XXH32_state_t.
  *
- * @note
- *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
- *   digest, and update again.
- *
  * @param statePtr The state struct to calculate the hash from.
  *
  * @pre
  *  @p statePtr must not be `NULL`.
  *
- * @return The calculated xxHash32 value from that state.
+ * @return The calculated 32-bit xxHash32 value from that state.
+ *
+ * @note
+ *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
  */
-XXH_PUBLIC_API XXH32_hash_t  XXH32_digest (const XXH32_state_t* statePtr);
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 
 /*******   Canonical representation   *******/
 
-/*
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- * This the simplest and fastest format for further post-processing.
- *
- * However, this leaves open the question of what is the order on the byte level,
- * since little and big endian conventions will store the same number differently.
- *
- * The canonical representation settles this issue by mandating big-endian
- * convention, the same convention as human-readable numbers (large digits first).
- *
- * When writing hash values to storage, sending them over a network, or printing
- * them, it's highly recommended to use the canonical representation to ensure
- * portability across a wider range of systems, present and future.
- *
- * The following functions allow transformation of hash values to and from
- * canonical format.
- */
-
 /*!
  * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
  */
@@ -568,11 +733,13 @@ typedef struct {
 /*!
  * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
  *
- * @param dst The @ref XXH32_canonical_t pointer to be stored to.
+ * @param dst  The @ref XXH32_canonical_t pointer to be stored to.
  * @param hash The @ref XXH32_hash_t to be converted.
  *
  * @pre
  *   @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
  */
 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
 
@@ -585,44 +752,75 @@ XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t
  *   @p src must not be `NULL`.
  *
  * @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
  */
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
 
 
+/*! @cond Doxygen ignores this part */
 #ifdef __has_attribute
 # define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
 #else
 # define XXH_HAS_ATTRIBUTE(x) 0
 #endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * C23 __STDC_VERSION__ number hasn't been specified yet. For now
+ * leave as `201711L` (C17 + 1).
+ * TODO: Update to correct value when its been specified.
+ */
+#define XXH_C23_VN 201711L
+/*! @endcond */
 
+/*! @cond Doxygen ignores this part */
 /* C-language Attributes are added in C23. */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
 # define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
 #else
 # define XXH_HAS_C_ATTRIBUTE(x) 0
 #endif
+/*! @endcond */
 
+/*! @cond Doxygen ignores this part */
 #if defined(__cplusplus) && defined(__has_cpp_attribute)
 # define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
 #else
 # define XXH_HAS_CPP_ATTRIBUTE(x) 0
 #endif
+/*! @endcond */
 
+/*! @cond Doxygen ignores this part */
 /*
-Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
-introduced in CPP17 and C23.
-CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
-C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
-*/
-#if XXH_HAS_C_ATTRIBUTE(x)
-# define XXH_FALLTHROUGH [[fallthrough]]
-#elif XXH_HAS_CPP_ATTRIBUTE(x)
+ * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
+ * introduced in CPP17 and C23.
+ * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
+ * C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
+ */
+#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
 # define XXH_FALLTHROUGH [[fallthrough]]
 #elif XXH_HAS_ATTRIBUTE(__fallthrough__)
-# define XXH_FALLTHROUGH __attribute__ ((fallthrough))
+# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
 #else
-# define XXH_FALLTHROUGH
+# define XXH_FALLTHROUGH /* fallthrough */
 #endif
+/*! @endcond */
+
+/*! @cond Doxygen ignores this part */
+/*
+ * Define XXH_NOESCAPE for annotated pointers in public API.
+ * https://clang.llvm.org/docs/AttributeReference.html#noescape
+ * As of writing this, only supported by clang.
+ */
+#if XXH_HAS_ATTRIBUTE(noescape)
+# define XXH_NOESCAPE __attribute__((noescape))
+#else
+# define XXH_NOESCAPE
+#endif
+/*! @endcond */
+
 
 /*!
  * @}
@@ -644,7 +842,11 @@ typedef uint64_t XXH64_hash_t;
 #elif !defined (__VMS) \
   && (defined (__cplusplus) \
   || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#  include <stdint.h>
+#   ifdef _AIX
+#     include <inttypes.h>
+#   else
+#     include <stdint.h>
+#   endif
    typedef uint64_t XXH64_hash_t;
 #else
 #  include <limits.h>
@@ -660,7 +862,7 @@ typedef uint64_t XXH64_hash_t;
 /*!
  * @}
  *
- * @defgroup xxh64_family XXH64 family
+ * @defgroup XXH64_family XXH64 family
  * @ingroup public
  * @{
  * Contains functions used in the classic 64-bit xxHash algorithm.
@@ -671,13 +873,9 @@ typedef uint64_t XXH64_hash_t;
  *   It provides better speed for systems with vector processing capabilities.
  */
 
-
 /*!
  * @brief Calculates the 64-bit hash of @p input using xxHash64.
  *
- * This function usually runs faster on 64-bit systems, but slower on 32-bit
- * systems (see benchmark).
- *
  * @param input The block of data to be hashed, at least @p length bytes in size.
  * @param length The length of @p input, in bytes.
  * @param seed The 64-bit seed to alter the hash's output predictably.
@@ -687,41 +885,145 @@ typedef uint64_t XXH64_hash_t;
  *   readable, contiguous memory. However, if @p length is `0`, @p input may be
  *   `NULL`. In C++, this also must be *TriviallyCopyable*.
  *
- * @return The calculated 64-bit hash.
+ * @return The calculated 64-bit xxHash64 value.
  *
- * @see
- *    XXH32(), XXH3_64bits_withSeed(), XXH3_128bits_withSeed(), XXH128():
- *    Direct equivalents for the other variants of xxHash.
- * @see
- *    XXH64_createState(), XXH64_update(), XXH64_digest(): Streaming version.
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH64(const void* input, size_t length, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*!
  * @brief The opaque state struct for the XXH64 streaming API.
  *
  * @see XXH64_state_s for details.
  */
 typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+
+/*!
+ * @brief Allocates an @ref XXH64_state_t.
+ *
+ * @return An allocated pointer of @ref XXH64_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH64_freeState().
+ */
+XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
+
+/*!
+ * @brief Frees an @ref XXH64_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note @p statePtr must be allocated with XXH64_createState().
+ */
 XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
 
-XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH64_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH64_digest (const XXH64_state_t* statePtr);
+/*!
+ * @brief Copies one @ref XXH64_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
+ */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH64_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note This function resets and seeds a state. Call it before @ref XXH64_update().
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
 
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH64_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated hash value from an @ref XXH64_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated 64-bit xxHash64 value from that state.
+ *
+ * @note
+ *   Calling XXH64_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 /*******   Canonical representation   *******/
+
+/*!
+ * @brief Canonical (big endian) representation of @ref XXH64_hash_t.
+ */
 typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+
+/*!
+ * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.
+ *
+ * @param dst The @ref XXH64_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH64_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.
+ *
+ * @param src The @ref XXH64_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ *
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
 
 #ifndef XXH_NO_XXH3
+
 /*!
  * @}
  * ************************************************************************
- * @defgroup xxh3_family XXH3 family
+ * @defgroup XXH3_family XXH3 family
  * @ingroup public
  * @{
  *
@@ -741,16 +1043,26 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
  *
  * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
  * but does not require it.
- * Any 32-bit and 64-bit targets that can run XXH32 smoothly
- * can run XXH3 at competitive speeds, even without vector support.
- * Further details are explained in the implementation.
- *
- * Optimized implementations are provided for AVX512, AVX2, SSE2, NEON, POWER8,
- * ZVector and scalar targets. This can be controlled via the XXH_VECTOR macro.
+ * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
+ * at competitive speeds, even without vector support. Further details are
+ * explained in the implementation.
+ *
+ * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD
+ * implementations for many common platforms:
+ *   - AVX512
+ *   - AVX2
+ *   - SSE2
+ *   - ARM NEON
+ *   - WebAssembly SIMD128
+ *   - POWER8 VSX
+ *   - s390x ZVector
+ * This can be controlled via the @ref XXH_VECTOR macro, but it automatically
+ * selects the best version according to predefined macros. For the x86 family, an
+ * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
  *
  * XXH3 implementation is portable:
  * it has a generic C90 formulation that can be compiled on any platform,
- * all implementations generage exactly the same hash value on all platforms.
+ * all implementations generate exactly the same hash value on all platforms.
  * Starting from v0.8.0, it's also labelled "stable", meaning that
  * any future version will also generate the same hash value.
  *
@@ -762,24 +1074,59 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
  *
  * The API supports one-shot hashing, streaming mode, and custom secrets.
  */
-
 /*-**********************************************************************
 *  XXH3 64-bit variant
 ************************************************************************/
 
-/* XXH3_64bits():
- * default 64-bit variant, using default secret and default seed of 0.
- * It's the fastest variant. */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* data, size_t len);
+/*!
+ * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input.
+ *
+ * @param input  The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ *   This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however
+ *   it may have slightly better performance due to constant propagation of the
+ *   defaults.
+ *
+ * @see
+ *    XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
 
-/*
- * XXH3_64bits_withSeed():
- * This variant generates a custom secret on the fly
- * based on default secret altered using the `seed` value.
+/*!
+ * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input.
+ *
+ * @param input  The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ * @param seed   The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @note
+ *    seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
  * While this operation is decently fast, note that it's not completely free.
- * Note: seed==0 produces the same results as XXH3_64bits().
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
 
 /*!
  * The bare minimum size for a custom secret.
@@ -790,27 +1137,43 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSeed(const void* data, size_t len, X
  */
 #define XXH3_SECRET_SIZE_MIN 136
 
-/*
- * XXH3_64bits_withSecret():
+/*!
+ * @brief Calculates 64-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 64-bit XXH3 hash value.
+ *
+ * @pre
+ *   The memory between @p data and @p data + @p len must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p data may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
  * It's possible to provide any blob of bytes as a "secret" to generate the hash.
  * This makes it more difficult for an external actor to prepare an intentional collision.
- * The main condition is that secretSize *must* be large enough (>= XXH3_SECRET_SIZE_MIN).
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
  * However, the quality of the secret impacts the dispersion of the hash algorithm.
  * Therefore, the secret _must_ look like a bunch of random bytes.
  * Avoid "trivial" or structured data such as repeated sequences or a text document.
  * Whenever in doubt about the "randomness" of the blob of bytes,
- * consider employing "XXH3_generateSecret()" instead (see below).
+ * consider employing @ref XXH3_generateSecret() instead (see below).
  * It will generate a proper high entropy secret derived from the blob of bytes.
  * Another advantage of using XXH3_generateSecret() is that
  * it guarantees that all bits within the initial blob of bytes
  * will impact every bit of the output.
  * This is not necessarily the case when using the blob of bytes directly
  * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
 
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*
  * Streaming requires state maintenance.
  * This operation costs memory and CPU.
@@ -819,40 +1182,124 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_withSecret(const void* data, size_t len,
  */
 
 /*!
- * @brief The state struct for the XXH3 streaming API.
+ * @brief The opaque state struct for the XXH3 streaming API.
  *
  * @see XXH3_state_s for details.
  */
 typedef struct XXH3_state_s XXH3_state_t;
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void);
+XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
-XXH_PUBLIC_API void XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state);
 
-/*
- * XXH3_64bits_reset():
- * Initialize with default parameters.
- * digest will be equivalent to `XXH3_64bits()`.
+/*!
+ * @brief Copies one @ref XXH3_state_t to another.
+ *
+ * @param dst_state The state to copy to.
+ * @param src_state The state to copy from.
+ * @pre
+ *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH3_state_t* statePtr);
-/*
- * XXH3_64bits_reset_withSeed():
- * Generate a custom secret from `seed`, and store it into `statePtr`.
- * digest will be equivalent to `XXH3_64bits_withSeed()`.
+XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret with default parameters.
+ *   - Call this function before @ref XXH3_64bits_update().
+ *   - Digest will be equivalent to `XXH3_64bits()`.
+ *
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-/*
- * XXH3_64bits_reset_withSecret():
- * `secret` is referenced, it _must outlive_ the hash streaming session.
- * Similar to one-shot API, `secretSize` must be >= `XXH3_SECRET_SIZE_MIN`,
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed     The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret from `seed`.
+ *   - Call this function before @ref XXH3_64bits_update().
+ *   - Digest will be equivalent to `XXH3_64bits_withSeed()`.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   `secret` is referenced, it _must outlive_ the hash streaming session.
+ *
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
  * and the quality of produced hash values depends on secret's entropy
  * (secret's content should look like a bunch of random bytes).
  * When in doubt about the randomness of a candidate `secret`,
  * consider employing `XXH3_generateSecret()` instead (see below).
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
 
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH64_hash_t  XXH3_64bits_digest (const XXH3_state_t* statePtr);
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ * @pre
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note Call this to incrementally consume blocks of data.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
+
+/*!
+ * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 64-bit hash value from that state.
+ *
+ * @note
+ *   Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 
 /* note : canonical representation of XXH3 is the same as XXH64
  * since they both produce XXH64_hash_t values */
@@ -873,11 +1320,76 @@ typedef struct {
     XXH64_hash_t high64;  /*!< `value >> 64` */
 } XXH128_hash_t;
 
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* data, size_t len);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSeed(const void* data, size_t len, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t len, const void* secret, size_t secretSize);
+/*!
+ * @brief Calculates 128-bit unseeded variant of XXH3 of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len  The length of @p data, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
+ * for shorter inputs.
+ *
+ * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however
+ * it may have slightly better performance due to constant propagation of the
+ * defaults.
+ *
+ * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
+/*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data The block of data to be hashed, at least @p length bytes in size.
+ * @param len  The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash result predictably.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * @note
+ *    seed == 0 produces the same results as @ref XXH3_64bits().
+ *
+ * This variant generates a custom secret on the fly based on default secret
+ * altered using the @p seed value.
+ *
+ * While this operation is decently fast, note that it's not completely free.
+ *
+ * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
+/*!
+ * @brief Calculates 128-bit variant of XXH3 with a custom "secret".
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @return The calculated 128-bit variant of XXH3 value.
+ *
+ * It's possible to provide any blob of bytes as a "secret" to generate the hash.
+ * This makes it more difficult for an external actor to prepare an intentional collision.
+ * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
+ * However, the quality of the secret impacts the dispersion of the hash algorithm.
+ * Therefore, the secret _must_ look like a bunch of random bytes.
+ * Avoid "trivial" or structured data such as repeated sequences or a text document.
+ * Whenever in doubt about the "randomness" of the blob of bytes,
+ * consider employing @ref XXH3_generateSecret() instead (see below).
+ * It will generate a proper high entropy secret derived from the blob of bytes.
+ * Another advantage of using XXH3_generateSecret() is that
+ * it guarantees that all bits within the initial blob of bytes
+ * will impact every bit of the output.
+ * This is not necessarily the case when using the blob of bytes directly
+ * because, when hashing _small_ inputs, only a portion of the secret is employed.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
 
 /*******   Streaming   *******/
+#ifndef XXH_NO_STREAM
 /*
  * Streaming requires state maintenance.
  * This operation costs memory and CPU.
@@ -890,39 +1402,163 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_withSecret(const void* data, size_t le
  * All reset and streaming functions have same meaning as their 64-bit counterpart.
  */
 
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH3_state_t* statePtr);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed);
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize);
+/*!
+ * @brief Resets an @ref XXH3_state_t to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret with default parameters.
+ *   - Call it before @ref XXH3_128bits_update().
+ *   - Digest will be equivalent to `XXH3_128bits()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
+
+/*!
+ * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
+ *
+ * @param statePtr The state struct to reset.
+ * @param seed     The 64-bit seed to alter the hash result predictably.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   - This function resets `statePtr` and generate a secret from `seed`.
+ *   - Call it before @ref XXH3_128bits_update().
+ *   - Digest will be equivalent to `XXH3_128bits_withSeed()`.
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   The state struct to reset.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * `secret` is referenced, it _must outlive_ the hash streaming session.
+ * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
+ * and the quality of produced hash values depends on secret's entropy
+ * (secret's content should look like a bunch of random bytes).
+ * When in doubt about the randomness of a candidate `secret`,
+ * consider employing `XXH3_generateSecret()` instead (see below).
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
+
+/*!
+ * @brief Consumes a block of @p input to an @ref XXH3_state_t.
+ *
+ * Call this to incrementally consume blocks of data.
+ *
+ * @param statePtr The state struct to update.
+ * @param input The block of data to be hashed, at least @p length bytes in size.
+ * @param length The length of @p input, in bytes.
+ *
+ * @pre
+ *   @p statePtr must not be `NULL`.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @note
+ *   The memory between @p input and @p input + @p length must be valid,
+ *   readable, contiguous memory. However, if @p length is `0`, @p input may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ */
+XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
 
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH3_state_t* statePtr, const void* input, size_t length);
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* statePtr);
+/*!
+ * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
+ *
+ * @param statePtr The state struct to calculate the hash from.
+ *
+ * @pre
+ *  @p statePtr must not be `NULL`.
+ *
+ * @return The calculated XXH3 128-bit hash value from that state.
+ *
+ * @note
+ *   Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
+ *   digest, and update again.
+ *
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
+#endif /* !XXH_NO_STREAM */
 
 /* Following helper functions make it possible to compare XXH128_hast_t values.
  * Since XXH128_hash_t is a structure, this capability is not offered by the language.
  * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
 
 /*!
- * XXH128_isEqual():
- * Return: 1 if `h1` and `h2` are equal, 0 if they are not.
+ * @brief Check equality of two XXH128_hash_t values
+ *
+ * @param h1 The 128-bit hash value.
+ * @param h2 Another 128-bit hash value.
+ *
+ * @return `1` if `h1` and `h2` are equal.
+ * @return `0` if they are not.
  */
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
+XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
 
 /*!
- * XXH128_cmp():
+ * @brief Compares two @ref XXH128_hash_t
  *
  * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
  *
- * return: >0 if *h128_1  > *h128_2
- *         =0 if *h128_1 == *h128_2
- *         <0 if *h128_1  < *h128_2
+ * @param h128_1 Left-hand side value
+ * @param h128_2 Right-hand side value
+ *
+ * @return >0 if @p h128_1  > @p h128_2
+ * @return =0 if @p h128_1 == @p h128_2
+ * @return <0 if @p h128_1  < @p h128_2
  */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2);
+XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
 
 
 /*******   Canonical representation   *******/
 typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
-XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash);
-XXH_PUBLIC_API XXH128_hash_t XXH128_hashFromCanonical(const XXH128_canonical_t* src);
+
+
+/*!
+ * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
+ *
+ * @param dst  The @ref XXH128_canonical_t pointer to be stored to.
+ * @param hash The @ref XXH128_hash_t to be converted.
+ *
+ * @pre
+ *   @p dst must not be `NULL`.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
+
+/*!
+ * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.
+ *
+ * @param src The @ref XXH128_canonical_t to convert.
+ *
+ * @pre
+ *   @p src must not be `NULL`.
+ *
+ * @return The converted hash.
+ * @see @ref canonical_representation_example "Canonical Representation Example"
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
 
 
 #endif  /* !XXH_NO_XXH3 */
@@ -996,7 +1632,6 @@ struct XXH64_state_s {
    XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it. */
 };   /* typedef'd to XXH64_state_t */
 
-
 #ifndef XXH_NO_XXH3
 
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
@@ -1032,6 +1667,7 @@ struct XXH64_state_s {
 #define XXH3_INTERNALBUFFER_SIZE 256
 
 /*!
+ * @internal
  * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
  *
  * This is the size used in @ref XXH3_kSecret and the seeded functions.
@@ -1064,7 +1700,7 @@ struct XXH64_state_s {
  */
 struct XXH3_state_s {
    XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
-       /*!< The 8 accumulators. Similar to `vN` in @ref XXH32_state_s::v1 and @ref XXH64_state_s */
+       /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
    XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
        /*!< Used to store a custom secret generated from a seed. */
    XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
@@ -1104,69 +1740,148 @@ struct XXH3_state_s {
  * Note that this doesn't prepare the state for a streaming operation,
  * it's still necessary to use XXH3_NNbits_reset*() afterwards.
  */
-#define XXH3_INITSTATE(XXH3_state_ptr)   { (XXH3_state_ptr)->seed = 0; }
+#define XXH3_INITSTATE(XXH3_state_ptr)                       \
+    do {                                                     \
+        XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
+        tmp_xxh3_state_ptr->seed = 0;                        \
+        tmp_xxh3_state_ptr->extSecret = NULL;                \
+    } while(0)
 
 
-/* XXH128() :
- * simple alias to pre-selected XXH3_128bits variant
+/*!
+ * @brief Calculates the 128-bit hash of @p data using XXH3.
+ *
+ * @param data The block of data to be hashed, at least @p len bytes in size.
+ * @param len  The length of @p data, in bytes.
+ * @param seed The 64-bit seed to alter the hash's output predictably.
+ *
+ * @pre
+ *   The memory between @p data and @p data + @p len must be valid,
+ *   readable, contiguous memory. However, if @p len is `0`, @p data may be
+ *   `NULL`. In C++, this also must be *TriviallyCopyable*.
+ *
+ * @return The calculated 128-bit XXH3 value.
+ *
+ * @see @ref single_shot_example "Single Shot Example" for an example.
  */
-XXH_PUBLIC_API XXH128_hash_t XXH128(const void* data, size_t len, XXH64_hash_t seed);
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
 
 
 /* ===   Experimental API   === */
 /* Symbols defined below must be considered tied to a specific library version. */
 
-/*
- * XXH3_generateSecret():
+/*!
+ * @brief Derive a high-entropy secret from any user-defined content, named customSeed.
+ *
+ * @param secretBuffer    A writable buffer for derived high-entropy secret data.
+ * @param secretSize      Size of secretBuffer, in bytes.  Must be >= XXH3_SECRET_DEFAULT_SIZE.
+ * @param customSeed      A user-defined content.
+ * @param customSeedSize  Size of customSeed, in bytes.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
  *
- * Derive a high-entropy secret from any user-defined content, named customSeed.
  * The generated secret can be used in combination with `*_withSecret()` functions.
- * The `_withSecret()` variants are useful to provide a higher level of protection than 64-bit seed,
- * as it becomes much more difficult for an external actor to guess how to impact the calculation logic.
+ * The `_withSecret()` variants are useful to provide a higher level of protection
+ * than 64-bit seed, as it becomes much more difficult for an external actor to
+ * guess how to impact the calculation logic.
  *
  * The function accepts as input a custom seed of any length and any content,
- * and derives from it a high-entropy secret of length @secretSize
- * into an already allocated buffer @secretBuffer.
- * @secretSize must be >= XXH3_SECRET_SIZE_MIN
+ * and derives from it a high-entropy secret of length @p secretSize into an
+ * already allocated buffer @p secretBuffer.
  *
  * The generated secret can then be used with any `*_withSecret()` variant.
- * Functions `XXH3_128bits_withSecret()`, `XXH3_64bits_withSecret()`,
- * `XXH3_128bits_reset_withSecret()` and `XXH3_64bits_reset_withSecret()`
+ * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
+ * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
  * are part of this list. They all accept a `secret` parameter
- * which must be large enough for implementation reasons (>= XXH3_SECRET_SIZE_MIN)
+ * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
  * _and_ feature very high entropy (consist of random-looking bytes).
- * These conditions can be a high bar to meet, so
- * XXH3_generateSecret() can be employed to ensure proper quality.
+ * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
+ * be employed to ensure proper quality.
+ *
+ * @p customSeed can be anything. It can have any size, even small ones,
+ * and its content can be anything, even "poor entropy" sources such as a bunch
+ * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
  *
- * customSeed can be anything. It can have any size, even small ones,
- * and its content can be anything, even "poor entropy" sources such as a bunch of zeroes.
- * The resulting `secret` will nonetheless provide all required qualities.
+ * @pre
+ *   - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
+ *   - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
  *
- * When customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
+ * Example code:
+ * @code{.c}
+ *    #include <stdio.h>
+ *    #include <stdlib.h>
+ *    #include <string.h>
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Hashes argv[2] using the entropy from argv[1].
+ *    int main(int argc, char* argv[])
+ *    {
+ *        char secret[XXH3_SECRET_SIZE_MIN];
+ *        if (argv != 3) { return 1; }
+ *        XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
+ *        XXH64_hash_t h = XXH3_64bits_withSecret(
+ *             argv[2], strlen(argv[2]),
+ *             secret, sizeof(secret)
+ *        );
+ *        printf("%016llx\n", (unsigned long long) h);
+ *    }
+ * @endcode
  */
-XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize);
+XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
 
-
-/*
- * XXH3_generateSecret_fromSeed():
- *
- * Generate the same secret as the _withSeed() variants.
+/*!
+ * @brief Generate the same secret as the _withSeed() variants.
  *
- * The resulting secret has a length of XXH3_SECRET_DEFAULT_SIZE (necessarily).
- * @secretBuffer must be already allocated, of size at least XXH3_SECRET_DEFAULT_SIZE bytes.
+ * @param secretBuffer A writable buffer of @ref XXH3_SECRET_SIZE_MIN bytes
+ * @param seed         The 64-bit seed to alter the hash result predictably.
  *
  * The generated secret can be used in combination with
  *`*_withSecret()` and `_withSecretandSeed()` variants.
- * This generator is notably useful in combination with `_withSecretandSeed()`,
- * as a way to emulate a faster `_withSeed()` variant.
+ *
+ * Example C++ `std::string` hash class:
+ * @code{.cpp}
+ *    #include <string>
+ *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
+ *    #include "xxhash.h"
+ *    // Slow, seeds each time
+ *    class HashSlow {
+ *        XXH64_hash_t seed;
+ *    public:
+ *        HashSlow(XXH64_hash_t s) : seed{s} {}
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
+ *        }
+ *    };
+ *    // Fast, caches the seeded secret for future uses.
+ *    class HashFast {
+ *        unsigned char secret[XXH3_SECRET_SIZE_MIN];
+ *    public:
+ *        HashFast(XXH64_hash_t s) {
+ *            XXH3_generateSecret_fromSeed(secret, seed);
+ *        }
+ *        size_t operator()(const std::string& x) const {
+ *            return size_t{
+ *                XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
+ *            };
+ *        }
+ *    };
+ * @endcode
  */
-XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed);
+XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
 
-/*
- * *_withSecretandSeed() :
+/*!
+ * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param data       The block of data to be hashed, at least @p len bytes in size.
+ * @param len        The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed       The 64-bit seed to alter the hash result predictably.
+ *
  * These variants generate hash values using either
- * @seed for "short" keys (< XXH3_MIDSIZE_MAX = 240 bytes)
- * or @secret for "large" keys (>= XXH3_MIDSIZE_MAX).
+ * @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes)
+ * or @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX).
  *
  * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
  * `_withSeed()` has to generate the secret on the fly for "large" keys.
@@ -1175,7 +1890,7 @@ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_
  * which requires more instructions than _withSeed() variants.
  * Therefore, _withSecretandSeed variant combines the best of both worlds.
  *
- * When @secret has been generated by XXH3_generateSecret_fromSeed(),
+ * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
  * this variant produces *exactly* the same results as `_withSeed()` variant,
  * hence offering only a pure speed benefit on "large" input,
  * by skipping the need to regenerate the secret for every large input.
@@ -1184,33 +1899,71 @@ XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_
  * for example with XXH3_64bits(), which then becomes the seed,
  * and then employ both the seed and the secret in _withSecretandSeed().
  * On top of speed, an added benefit is that each bit in the secret
- * has a 50% chance to swap each bit in the output,
- * via its impact to the seed.
+ * has a 50% chance to swap each bit in the output, via its impact to the seed.
+ *
  * This is not guaranteed when using the secret directly in "small data" scenarios,
  * because only portions of the secret are employed for small data.
  */
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* data, size_t len,
-                              const void* secret, size_t secretSize,
+XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
+                              XXH_NOESCAPE const void* secret, size_t secretSize,
                               XXH64_hash_t seed);
-
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* data, size_t len,
-                               const void* secret, size_t secretSize,
+/*!
+ * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
+ *
+ * @param input      The block of data to be hashed, at least @p len bytes in size.
+ * @param length     The length of @p data, in bytes.
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
+XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
+                               XXH_NOESCAPE const void* secret, size_t secretSize,
                                XXH64_hash_t seed64);
-
+#ifndef XXH_NO_STREAM
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
-                                    const void* secret, size_t secretSize,
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                    XXH_NOESCAPE const void* secret, size_t secretSize,
                                     XXH64_hash_t seed64);
-
+/*!
+ * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
+ *
+ * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ * @param secret     The secret data.
+ * @param secretSize The length of @p secret, in bytes.
+ * @param seed64     The 64-bit seed to alter the hash result predictably.
+ *
+ * @return @ref XXH_OK on success.
+ * @return @ref XXH_ERROR on failure.
+ *
+ * @see XXH3_64bits_withSecretandSeed()
+ */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
-                                     const void* secret, size_t secretSize,
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
+                                     XXH_NOESCAPE const void* secret, size_t secretSize,
                                      XXH64_hash_t seed64);
+#endif /* !XXH_NO_STREAM */
 
-
-#endif  /* XXH_NO_XXH3 */
+#endif  /* !XXH_NO_XXH3 */
 #endif  /* XXH_NO_LONG_LONG */
 #if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
 #  define XXH_IMPLEMENTATION
@@ -1264,7 +2017,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 /*!
  * @brief Define this to disable 64-bit code.
  *
- * Useful if only using the @ref xxh32_family and you have a strict C90 compiler.
+ * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
  */
 #  define XXH_NO_LONG_LONG
 #  undef XXH_NO_LONG_LONG /* don't actually */
@@ -1287,7 +2040,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
  *     eliminate the function call and treat it as an unaligned access.
  *
- *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((packed))`
+ *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
  *   @par
  *     Depends on compiler extensions and is therefore not portable.
  *     This method is safe _if_ your compiler supports it,
@@ -1307,7 +2060,7 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *     inline small `memcpy()` calls, and it might also be faster on big-endian
  *     systems which lack a native byteswap instruction. However, some compilers
  *     will emit literal byteshifts even if the target supports unaligned access.
- *  .
+ *
  *
  * @warning
  *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
@@ -1321,6 +2074,34 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 #  define XXH_FORCE_MEMORY_ACCESS 0
 
 /*!
+ * @def XXH_SIZE_OPT
+ * @brief Controls how much xxHash optimizes for size.
+ *
+ * xxHash, when compiled, tends to result in a rather large binary size. This
+ * is mostly due to heavy usage to forced inlining and constant folding of the
+ * @ref XXH3_family to increase performance.
+ *
+ * However, some developers prefer size over speed. This option can
+ * significantly reduce the size of the generated code. When using the `-Os`
+ * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
+ * otherwise it is defined to 0.
+ *
+ * Most of these size optimizations can be controlled manually.
+ *
+ * This is a number from 0-2.
+ *  - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
+ *    comes first.
+ *  - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
+ *    conservative and disables hacks that increase code size. It implies the
+ *    options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
+ *    and @ref XXH3_NEON_LANES == 8 if they are not already defined.
+ *  - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
+ *    Performance may cry. For example, the single shot functions just use the
+ *    streaming API.
+ */
+#  define XXH_SIZE_OPT 0
+
+/*!
  * @def XXH_FORCE_ALIGN_CHECK
  * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
  * and XXH64() only).
@@ -1341,9 +2122,11 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  *
  * In these cases, the alignment check can be removed by setting this macro to 0.
  * Then the code will always use unaligned memory access.
- * Align check is automatically disabled on x86, x64 & arm64,
+ * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
  * which are platforms known to offer good unaligned memory accesses performance.
  *
+ * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
+ *
  * This option does not affect XXH3 (only XXH32 and XXH64).
  */
 #  define XXH_FORCE_ALIGN_CHECK 0
@@ -1365,12 +2148,29 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
  * compiler full control on whether to inline or not.
  *
- * When not optimizing (-O0), optimizing for size (-Os, -Oz), or using
- * -fno-inline with GCC or Clang, this will automatically be defined.
+ * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
+ * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
  */
 #  define XXH_NO_INLINE_HINTS 0
 
 /*!
+ * @def XXH3_INLINE_SECRET
+ * @brief Determines whether to inline the XXH3 withSecret code.
+ *
+ * When the secret size is known, the compiler can improve the performance
+ * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
+ *
+ * However, if the secret size is not known, it doesn't have any benefit. This
+ * happens when xxHash is compiled into a global symbol. Therefore, if
+ * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
+ *
+ * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
+ * that are *sometimes* force inline on -Og, and it is impossible to automatically
+ * detect this optimization level.
+ */
+#  define XXH3_INLINE_SECRET 0
+
+/*!
  * @def XXH32_ENDJMP
  * @brief Whether to use a jump for `XXH32_finalize`.
  *
@@ -1391,34 +2191,45 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
  */
 #  define XXH_OLD_NAMES
 #  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
+
+/*!
+ * @def XXH_NO_STREAM
+ * @brief Disables the streaming API.
+ *
+ * When xxHash is not inlined and the streaming functions are not used, disabling
+ * the streaming functions can improve code size significantly, especially with
+ * the @ref XXH3_family which tends to make constant folded copies of itself.
+ */
+#  define XXH_NO_STREAM
+#  undef XXH_NO_STREAM /* don't actually */
 #endif /* XXH_DOXYGEN */
 /*!
  * @}
  */
 
 #ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-   /* prefer __packed__ structures (method 1) for gcc on armv7+ and mips */
-#  if !defined(__clang__) && \
-( \
-    (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
-    ( \
-        defined(__GNUC__) && ( \
-            (defined(__ARM_ARCH) && __ARM_ARCH >= 7) || \
-            ( \
-                defined(__mips__) && \
-                (__mips <= 5 || __mips_isa_rev < 6) && \
-                (!defined(__mips16) || defined(__mips_mips16e2)) \
-            ) \
-        ) \
-    ) \
-)
+   /* prefer __packed__ structures (method 1) for GCC
+    * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
+    * which for some reason does unaligned loads. */
+#  if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
 #    define XXH_FORCE_MEMORY_ACCESS 1
 #  endif
 #endif
 
+#ifndef XXH_SIZE_OPT
+   /* default to 1 for -Os or -Oz */
+#  if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
+#    define XXH_SIZE_OPT 1
+#  else
+#    define XXH_SIZE_OPT 0
+#  endif
+#endif
+
 #ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
-#  if defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) \
-   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64) /* visual */
+   /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
+#  if XXH_SIZE_OPT >= 1 || \
+      defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
+   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64)    || defined(_M_ARM) /* visual */
 #    define XXH_FORCE_ALIGN_CHECK 0
 #  else
 #    define XXH_FORCE_ALIGN_CHECK 1
@@ -1426,14 +2237,22 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 #endif
 
 #ifndef XXH_NO_INLINE_HINTS
-#  if defined(__OPTIMIZE_SIZE__) /* -Os, -Oz */ \
-   || defined(__NO_INLINE__)     /* -O0, -fno-inline */
+#  if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__)  /* -O0, -fno-inline */
 #    define XXH_NO_INLINE_HINTS 1
 #  else
 #    define XXH_NO_INLINE_HINTS 0
 #  endif
 #endif
 
+#ifndef XXH3_INLINE_SECRET
+#  if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
+     || !defined(XXH_INLINE_ALL)
+#    define XXH3_INLINE_SECRET 0
+#  else
+#    define XXH3_INLINE_SECRET 1
+#  endif
+#endif
+
 #ifndef XXH32_ENDJMP
 /* generally preferable for performance */
 #  define XXH32_ENDJMP 0
@@ -1448,13 +2267,56 @@ XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr,
 /* *************************************
 *  Includes & Memory related functions
 ***************************************/
-/* Modify the local functions below should you wish to use some other memory routines */
-/* for ZSTD_malloc(), ZSTD_free() */
-#define ZSTD_DEPS_NEED_MALLOC
-#include "zstd_deps.h"  /* size_t, ZSTD_malloc, ZSTD_free, ZSTD_memcpy */
-static void* XXH_malloc(size_t s) { return ZSTD_malloc(s); }
-static void  XXH_free  (void* p)  { ZSTD_free(p); }
-static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_memcpy(dest,src,size); }
+#if defined(XXH_NO_STREAM)
+/* nothing */
+#elif defined(XXH_NO_STDLIB)
+
+/* When requesting to disable any mention of stdlib,
+ * the library loses the ability to invoked malloc / free.
+ * In practice, it means that functions like `XXH*_createState()`
+ * will always fail, and return NULL.
+ * This flag is useful in situations where
+ * xxhash.h is integrated into some kernel, embedded or limited environment
+ * without access to dynamic allocation.
+ */
+
+static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
+static void XXH_free(void* p) { (void)p; }
+
+#else
+
+/*
+ * Modify the local functions below should you wish to use
+ * different memory routines for malloc() and free()
+ */
+#include <stdlib.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than malloc().
+ */
+static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than free().
+ */
+static void XXH_free(void* p) { free(p); }
+
+#endif  /* XXH_NO_STDLIB */
+
+#include <string.h>
+
+/*!
+ * @internal
+ * @brief Modify this function to use a different routine than memcpy().
+ */
+static void* XXH_memcpy(void* dest, const void* src, size_t size)
+{
+    return memcpy(dest,src,size);
+}
+
+#include <limits.h>   /* ULLONG_MAX */
 
 
 /* *************************************
@@ -1487,6 +2349,11 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
 #  define XXH_NO_INLINE static
 #endif
 
+#if XXH3_INLINE_SECRET
+#  define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
+#else
+#  define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
+#endif
 
 
 /* *************************************
@@ -1512,14 +2379,17 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
 #  include <assert.h>   /* note: can still be disabled with NDEBUG */
 #  define XXH_ASSERT(c)   assert(c)
 #else
-#  define XXH_ASSERT(c)   ((void)0)
+#  if defined(__INTEL_COMPILER)
+#    define XXH_ASSERT(c)   XXH_ASSUME((unsigned char) (c))
+#  else
+#    define XXH_ASSERT(c)   XXH_ASSUME(c)
+#  endif
 #endif
 
 /* note: use after variable declarations */
 #ifndef XXH_STATIC_ASSERT
 #  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
-#    include <assert.h>
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
+#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
 #  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
 #    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
 #  else
@@ -1534,7 +2404,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
  * @brief Used to prevent unwanted optimizations for @p var.
  *
  * It uses an empty GCC inline assembly statement with a register constraint
- * which forces @p var into a general purpose register (e.g. eax, ebx, ecx
+ * which forces @p var into a general purpose register (eg eax, ebx, ecx
  * on x86) and marks it as modified.
  *
  * This is used in a few places to avoid unwanted autovectorization (e.g.
@@ -1545,18 +2415,30 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
  * XXH3_initCustomSecret_scalar().
  */
 #if defined(__GNUC__) || defined(__clang__)
-#  define XXH_COMPILER_GUARD(var) __asm__ __volatile__("" : "+r" (var))
+#  define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
 #else
 #  define XXH_COMPILER_GUARD(var) ((void)0)
 #endif
 
+/* Specifically for NEON vectors which use the "w" constraint, on
+ * Clang. */
+#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)
+#  define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var))
+#else
+#  define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0)
+#endif
+
 /* *************************************
 *  Basic Types
 ***************************************/
 #if !defined (__VMS) \
  && (defined (__cplusplus) \
  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-# include <stdint.h>
+# ifdef _AIX
+#   include <inttypes.h>
+# else
+#   include <stdint.h>
+# endif
   typedef uint8_t xxh_u8;
 #else
   typedef unsigned char xxh_u8;
@@ -1564,6 +2446,7 @@ static void* XXH_memcpy(void* dest, const void* src, size_t size) { return ZSTD_
 typedef XXH32_hash_t xxh_u32;
 
 #ifdef XXH_OLD_NAMES
+#  warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"
 #  define BYTE xxh_u8
 #  define U8   xxh_u8
 #  define U32  xxh_u32
@@ -1637,18 +2520,19 @@ static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr;
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
 
 /*
- * __pack instructions are safer but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
  */
 #ifdef XXH_OLD_NAMES
 typedef union { xxh_u32 u32; } __attribute__((packed)) unalign;
 #endif
 static xxh_u32 XXH_read32(const void* ptr)
 {
-    typedef union { xxh_u32 u32; } __attribute__((packed)) xxh_unalign;
-    return ((const xxh_unalign*)ptr)->u32;
+    typedef __attribute__((aligned(1))) xxh_u32 xxh_unalign32;
+    return *((const xxh_unalign32*)ptr);
 }
 
 #else
@@ -1731,6 +2615,51 @@ static int XXH_isLittleEndian(void)
 #  define XXH_HAS_BUILTIN(x) 0
 #endif
 
+
+
+/*
+ * C23 and future versions have standard "unreachable()".
+ * Once it has been implemented reliably we can add it as an
+ * additional case:
+ *
+ * ```
+ * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
+ * #  include <stddef.h>
+ * #  ifdef unreachable
+ * #    define XXH_UNREACHABLE() unreachable()
+ * #  endif
+ * #endif
+ * ```
+ *
+ * Note C++23 also has std::unreachable() which can be detected
+ * as follows:
+ * ```
+ * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
+ * #  include <utility>
+ * #  define XXH_UNREACHABLE() std::unreachable()
+ * #endif
+ * ```
+ * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
+ * We don't use that as including `<utility>` in `extern "C"` blocks
+ * doesn't work on GCC12
+ */
+
+#if XXH_HAS_BUILTIN(__builtin_unreachable)
+#  define XXH_UNREACHABLE() __builtin_unreachable()
+
+#elif defined(_MSC_VER)
+#  define XXH_UNREACHABLE() __assume(0)
+
+#else
+#  define XXH_UNREACHABLE()
+#endif
+
+#if XXH_HAS_BUILTIN(__builtin_assume)
+#  define XXH_ASSUME(c) __builtin_assume(c)
+#else
+#  define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
+#endif
+
 /*!
  * @internal
  * @def XXH_rotl32(x,r)
@@ -1853,8 +2782,10 @@ XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
 *********************************************************************/
 /*!
  * @}
- * @defgroup xxh32_impl XXH32 implementation
+ * @defgroup XXH32_impl XXH32 implementation
  * @ingroup impl
+ *
+ * Details on the XXH32 implementation.
  * @{
  */
  /* #define instead of static const, to be used as initializers */
@@ -1888,7 +2819,7 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
     acc += input * XXH_PRIME32_2;
     acc  = XXH_rotl32(acc, 13);
     acc *= XXH_PRIME32_1;
-#if (defined(__SSE4_1__) || defined(__aarch64__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
     /*
      * UGLY HACK:
      * A compiler fence is the only thing that prevents GCC and Clang from
@@ -1918,9 +2849,12 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
      *   can load data, while v3 can multiply. SSE forces them to operate
      *   together.
      *
-     * This is also enabled on AArch64, as Clang autovectorizes it incorrectly
-     * and it is pointless writing a NEON implementation that is basically the
-     * same speed as scalar for XXH32.
+     * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
+     * the loop. NEON is only faster on the A53, and with the newer cores, it is less
+     * than half the speed.
+     *
+     * Additionally, this is used on WASM SIMD128 because it JITs to the same
+     * SIMD instructions and has the same issue.
      */
     XXH_COMPILER_GUARD(acc);
 #endif
@@ -1934,17 +2868,17 @@ static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
  * The final mix ensures that all input bits have a chance to impact any bit in
  * the output digest, resulting in an unbiased distribution.
  *
- * @param h32 The hash to avalanche.
+ * @param hash The hash to avalanche.
  * @return The avalanched hash.
  */
-static xxh_u32 XXH32_avalanche(xxh_u32 h32)
+static xxh_u32 XXH32_avalanche(xxh_u32 hash)
 {
-    h32 ^= h32 >> 15;
-    h32 *= XXH_PRIME32_2;
-    h32 ^= h32 >> 13;
-    h32 *= XXH_PRIME32_3;
-    h32 ^= h32 >> 16;
-    return(h32);
+    hash ^= hash >> 15;
+    hash *= XXH_PRIME32_2;
+    hash ^= hash >> 13;
+    hash *= XXH_PRIME32_3;
+    hash ^= hash >> 16;
+    return hash;
 }
 
 #define XXH_get32bits(p) XXH_readLE32_align(p, align)
@@ -1957,24 +2891,25 @@ static xxh_u32 XXH32_avalanche(xxh_u32 h32)
  * This final stage will digest them to ensure that all input bytes are present
  * in the final mix.
  *
- * @param h32 The hash to finalize.
+ * @param hash The hash to finalize.
  * @param ptr The pointer to the remaining input.
  * @param len The remaining length, modulo 16.
  * @param align Whether @p ptr is aligned.
  * @return The finalized hash.
+ * @see XXH64_finalize().
  */
-static xxh_u32
-XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
+static XXH_PUREF xxh_u32
+XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
 {
-#define XXH_PROCESS1 do {                           \
-    h32 += (*ptr++) * XXH_PRIME32_5;                \
-    h32 = XXH_rotl32(h32, 11) * XXH_PRIME32_1;      \
+#define XXH_PROCESS1 do {                             \
+    hash += (*ptr++) * XXH_PRIME32_5;                 \
+    hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1;      \
 } while (0)
 
-#define XXH_PROCESS4 do {                           \
-    h32 += XXH_get32bits(ptr) * XXH_PRIME32_3;      \
-    ptr += 4;                                   \
-    h32  = XXH_rotl32(h32, 17) * XXH_PRIME32_4;     \
+#define XXH_PROCESS4 do {                             \
+    hash += XXH_get32bits(ptr) * XXH_PRIME32_3;       \
+    ptr += 4;                                         \
+    hash  = XXH_rotl32(hash, 17) * XXH_PRIME32_4;     \
 } while (0)
 
     if (ptr==NULL) XXH_ASSERT(len == 0);
@@ -1990,49 +2925,49 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
             XXH_PROCESS1;
             --len;
         }
-        return XXH32_avalanche(h32);
+        return XXH32_avalanche(hash);
     } else {
          switch(len&15) /* or switch(bEnd - p) */ {
            case 12:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 8:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 4:       XXH_PROCESS4;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 13:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 9:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 5:       XXH_PROCESS4;
                          XXH_PROCESS1;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 14:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 10:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 6:       XXH_PROCESS4;
                          XXH_PROCESS1;
                          XXH_PROCESS1;
-                         return XXH32_avalanche(h32);
+                         return XXH32_avalanche(hash);
 
            case 15:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 11:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 7:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 3:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 2:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
+                         XXH_FALLTHROUGH;  /* fallthrough */
            case 1:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;
-           case 0:       return XXH32_avalanche(h32);
+                         XXH_FALLTHROUGH;  /* fallthrough */
+           case 0:       return XXH32_avalanche(hash);
         }
         XXH_ASSERT(0);
-        return h32;   /* reaching this point is deemed impossible */
+        return hash;   /* reaching this point is deemed impossible */
     }
 }
 
@@ -2052,7 +2987,7 @@ XXH32_finalize(xxh_u32 h32, const xxh_u8* ptr, size_t len, XXH_alignment align)
  * @param align Whether @p input is aligned.
  * @return The calculated hash.
  */
-XXH_FORCE_INLINE xxh_u32
+XXH_FORCE_INLINE XXH_PUREF xxh_u32
 XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
 {
     xxh_u32 h32;
@@ -2085,10 +3020,10 @@ XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment
     return XXH32_finalize(h32, input, len&15, align);
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
 {
-#if 0
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
     XXH32_state_t state;
     XXH32_reset(&state, seed);
@@ -2107,27 +3042,26 @@ XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t s
 
 
 /*******   Hash streaming   *******/
-/*!
- * @ingroup xxh32_family
- */
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
 {
     return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
 }
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
 {
     XXH_memcpy(dstState, srcState, sizeof(*dstState));
 }
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
 {
     XXH_ASSERT(statePtr != NULL);
@@ -2140,7 +3074,7 @@ XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t s
 }
 
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH_errorcode
 XXH32_update(XXH32_state_t* state, const void* input, size_t len)
 {
@@ -2195,7 +3129,7 @@ XXH32_update(XXH32_state_t* state, const void* input, size_t len)
 }
 
 
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
 {
     xxh_u32 h32;
@@ -2213,31 +3147,18 @@ XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
 
     return XXH32_finalize(h32, (const xxh_u8*)state->mem32, state->memsize, XXH_aligned);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 /*******   Canonical representation   *******/
 
-/*!
- * @ingroup xxh32_family
- * The default return values from XXH functions are unsigned 32 and 64 bit
- * integers.
- *
- * The canonical representation uses big endian convention, the same convention
- * as human-readable numbers (large digits first).
- *
- * This way, hash values can be written into a file or buffer, remaining
- * comparable across different systems.
- *
- * The following functions allow transformation of hash values to and from their
- * canonical format.
- */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
 {
-    /* XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t)); */
+    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
     XXH_memcpy(dst, &hash, sizeof(*dst));
 }
-/*! @ingroup xxh32_family */
+/*! @ingroup XXH32_family */
 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
 {
     return XXH_readBE32(src);
@@ -2278,18 +3199,19 @@ static xxh_u64 XXH_read64(const void* memPtr)
 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
 
 /*
- * __pack instructions are safer, but compiler specific, hence potentially
- * problematic for some compilers.
- *
- * Currently only defined for GCC and ICC.
+ * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
+ * documentation claimed that it only increased the alignment, but actually it
+ * can decrease it on gcc, clang, and icc:
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
+ * https://gcc.godbolt.org/z/xYez1j67Y.
  */
 #ifdef XXH_OLD_NAMES
 typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) unalign64;
 #endif
 static xxh_u64 XXH_read64(const void* ptr)
 {
-    typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((packed)) xxh_unalign64;
-    return ((const xxh_unalign64*)ptr)->u64;
+    typedef __attribute__((aligned(1))) xxh_u64 xxh_unalign64;
+    return *((const xxh_unalign64*)ptr);
 }
 
 #else
@@ -2380,8 +3302,10 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
 /*******   xxh64   *******/
 /*!
  * @}
- * @defgroup xxh64_impl XXH64 implementation
+ * @defgroup XXH64_impl XXH64 implementation
  * @ingroup impl
+ *
+ * Details on the XXH64 implementation.
  * @{
  */
 /* #define rather that static const, to be used as initializers */
@@ -2399,11 +3323,29 @@ XXH_readLE64_align(const void* ptr, XXH_alignment align)
 #  define PRIME64_5 XXH_PRIME64_5
 #endif
 
+/*! @copydoc XXH32_round */
 static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
 {
     acc += input * XXH_PRIME64_2;
     acc  = XXH_rotl64(acc, 31);
     acc *= XXH_PRIME64_1;
+#if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
+    /*
+     * DISABLE AUTOVECTORIZATION:
+     * A compiler fence is used to prevent GCC and Clang from
+     * autovectorizing the XXH64 loop (pragmas and attributes don't work for some
+     * reason) without globally disabling AVX512.
+     *
+     * Autovectorization of XXH64 tends to be detrimental,
+     * though the exact outcome may change depending on exact cpu and compiler version.
+     * For information, it has been reported as detrimental for Skylake-X,
+     * but possibly beneficial for Zen4.
+     *
+     * The default is to disable auto-vectorization,
+     * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.
+     */
+    XXH_COMPILER_GUARD(acc);
+#endif
     return acc;
 }
 
@@ -2415,43 +3357,59 @@ static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
     return acc;
 }
 
-static xxh_u64 XXH64_avalanche(xxh_u64 h64)
+/*! @copydoc XXH32_avalanche */
+static xxh_u64 XXH64_avalanche(xxh_u64 hash)
 {
-    h64 ^= h64 >> 33;
-    h64 *= XXH_PRIME64_2;
-    h64 ^= h64 >> 29;
-    h64 *= XXH_PRIME64_3;
-    h64 ^= h64 >> 32;
-    return h64;
+    hash ^= hash >> 33;
+    hash *= XXH_PRIME64_2;
+    hash ^= hash >> 29;
+    hash *= XXH_PRIME64_3;
+    hash ^= hash >> 32;
+    return hash;
 }
 
 
 #define XXH_get64bits(p) XXH_readLE64_align(p, align)
 
-static xxh_u64
-XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
+/*!
+ * @internal
+ * @brief Processes the last 0-31 bytes of @p ptr.
+ *
+ * There may be up to 31 bytes remaining to consume from the input.
+ * This final stage will digest them to ensure that all input bytes are present
+ * in the final mix.
+ *
+ * @param hash The hash to finalize.
+ * @param ptr The pointer to the remaining input.
+ * @param len The remaining length, modulo 32.
+ * @param align Whether @p ptr is aligned.
+ * @return The finalized hash
+ * @see XXH32_finalize().
+ */
+static XXH_PUREF xxh_u64
+XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
 {
     if (ptr==NULL) XXH_ASSERT(len == 0);
     len &= 31;
     while (len >= 8) {
         xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
         ptr += 8;
-        h64 ^= k1;
-        h64  = XXH_rotl64(h64,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
+        hash ^= k1;
+        hash  = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
         len -= 8;
     }
     if (len >= 4) {
-        h64 ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
+        hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
         ptr += 4;
-        h64 = XXH_rotl64(h64, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
+        hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
         len -= 4;
     }
     while (len > 0) {
-        h64 ^= (*ptr++) * XXH_PRIME64_5;
-        h64 = XXH_rotl64(h64, 11) * XXH_PRIME64_1;
+        hash ^= (*ptr++) * XXH_PRIME64_5;
+        hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
         --len;
     }
-    return  XXH64_avalanche(h64);
+    return  XXH64_avalanche(hash);
 }
 
 #ifdef XXH_OLD_NAMES
@@ -2464,7 +3422,15 @@ XXH64_finalize(xxh_u64 h64, const xxh_u8* ptr, size_t len, XXH_alignment align)
 #  undef XXH_PROCESS8_64
 #endif
 
-XXH_FORCE_INLINE xxh_u64
+/*!
+ * @internal
+ * @brief The implementation for @ref XXH64().
+ *
+ * @param input , len , seed Directly passed from @ref XXH64().
+ * @param align Whether @p input is aligned.
+ * @return The calculated hash.
+ */
+XXH_FORCE_INLINE XXH_PUREF xxh_u64
 XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
 {
     xxh_u64 h64;
@@ -2501,10 +3467,10 @@ XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment
 }
 
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t seed)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
-#if 0
+#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
     /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
     XXH64_state_t state;
     XXH64_reset(&state, seed);
@@ -2522,27 +3488,27 @@ XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t len, XXH64_hash_t s
 }
 
 /*******   Hash Streaming   *******/
-
-/*! @ingroup xxh64_family*/
+#ifndef XXH_NO_STREAM
+/*! @ingroup XXH64_family*/
 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
 {
     return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
 }
-/*! @ingroup xxh64_family */
+/*! @ingroup XXH64_family */
 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
 {
     XXH_free(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
 {
     XXH_memcpy(dstState, srcState, sizeof(*dstState));
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t seed)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
 {
     XXH_ASSERT(statePtr != NULL);
     memset(statePtr, 0, sizeof(*statePtr));
@@ -2553,9 +3519,9 @@ XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, XXH64_hash_t s
     return XXH_OK;
 }
 
-/*! @ingroup xxh64_family */
+/*! @ingroup XXH64_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH64_update (XXH64_state_t* state, const void* input, size_t len)
+XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
     if (input==NULL) {
         XXH_ASSERT(len == 0);
@@ -2605,8 +3571,8 @@ XXH64_update (XXH64_state_t* state, const void* input, size_t len)
 }
 
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
 {
     xxh_u64 h64;
 
@@ -2624,20 +3590,20 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_digest(const XXH64_state_t* state)
 
     return XXH64_finalize(h64, (const xxh_u8*)state->mem64, (size_t)state->total_len, XXH_aligned);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 /******* Canonical representation   *******/
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
 {
-    /* XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t)); */
+    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
     XXH_memcpy(dst, &hash, sizeof(*dst));
 }
 
-/*! @ingroup xxh64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+/*! @ingroup XXH64_family */
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
 {
     return XXH_readBE64(src);
 }
@@ -2650,7 +3616,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 ************************************************************************ */
 /*!
  * @}
- * @defgroup xxh3_impl XXH3 implementation
+ * @defgroup XXH3_impl XXH3 implementation
  * @ingroup impl
  * @{
  */
@@ -2658,11 +3624,19 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 /* ===   Compiler specifics   === */
 
 #if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
-#  define XXH_RESTRICT /* disable */
+#  define XXH_RESTRICT   /* disable */
 #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
 #  define XXH_RESTRICT   restrict
+#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
+   || (defined (__clang__)) \
+   || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
+   || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
+/*
+ * There are a LOT more compilers that recognize __restrict but this
+ * covers the major ones.
+ */
+#  define XXH_RESTRICT   __restrict
 #else
-/* Note: it might be useful to define __restrict or __restrict__ for some C++ compilers */
 #  define XXH_RESTRICT   /* disable */
 #endif
 
@@ -2676,10 +3650,26 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
 #    define XXH_unlikely(x) (x)
 #endif
 
+#ifndef XXH_HAS_INCLUDE
+#  ifdef __has_include
+/*
+ * Not defined as XXH_HAS_INCLUDE(x) (function-like) because
+ * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)
+ */
+#    define XXH_HAS_INCLUDE __has_include
+#  else
+#    define XXH_HAS_INCLUDE(x) 0
+#  endif
+#endif
+
 #if defined(__GNUC__) || defined(__clang__)
+#  if defined(__ARM_FEATURE_SVE)
+#    include <arm_sve.h>
+#  endif
 #  if defined(__ARM_NEON__) || defined(__ARM_NEON) \
-   || defined(__aarch64__)  || defined(_M_ARM) \
-   || defined(_M_ARM64)     || defined(_M_ARM64EC)
+   || (defined(_M_ARM) && _M_ARM >= 7) \
+   || defined(_M_ARM64) || defined(_M_ARM64EC) \
+   || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */
 #    define inline __inline__  /* circumvent a clang bug */
 #    include <arm_neon.h>
 #    undef inline
@@ -2790,7 +3780,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src
  * Note that these are actually implemented as macros.
  *
  * If this is not defined, it is detected automatically.
- * @ref XXH_X86DISPATCH overrides this.
+ * internal macro XXH_X86DISPATCH overrides this.
  */
 enum XXH_VECTOR_TYPE /* fake enum */ {
     XXH_SCALAR = 0,  /*!< Portable scalar version */
@@ -2802,8 +3792,13 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
                       */
     XXH_AVX2   = 2,  /*!< AVX2 for Haswell and Bulldozer */
     XXH_AVX512 = 3,  /*!< AVX512 for Skylake and Icelake */
-    XXH_NEON   = 4,  /*!< NEON for most ARMv7-A and all AArch64 */
+    XXH_NEON   = 4,  /*!<
+                       * NEON for most ARMv7-A, all AArch64, and WASM SIMD128
+                       * via the SIMDeverywhere polyfill provided with the
+                       * Emscripten SDK.
+                       */
     XXH_VSX    = 5,  /*!< VSX and ZVector for POWER8/z13 (64-bit) */
+    XXH_SVE    = 6,  /*!< SVE for some ARMv8-A and ARMv9-A */
 };
 /*!
  * @ingroup tuning
@@ -2825,12 +3820,16 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #  define XXH_AVX512 3
 #  define XXH_NEON   4
 #  define XXH_VSX    5
+#  define XXH_SVE    6
 #endif
 
 #ifndef XXH_VECTOR    /* can be defined on command line */
-#  if ( \
+#  if defined(__ARM_FEATURE_SVE)
+#    define XXH_VECTOR XXH_SVE
+#  elif ( \
         defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
      || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
+     || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \
    ) && ( \
         defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
@@ -2851,6 +3850,17 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #  endif
 #endif
 
+/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
+#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
+#  ifdef _MSC_VER
+#    pragma warning(once : 4606)
+#  else
+#    warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
+#  endif
+#  undef XXH_VECTOR
+#  define XXH_VECTOR XXH_SCALAR
+#endif
+
 /*
  * Controls the alignment of the accumulator,
  * for compatibility with aligned vector loads, which are usually faster.
@@ -2870,16 +3880,26 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
 #     define XXH_ACC_ALIGN 16
 #  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
 #     define XXH_ACC_ALIGN 64
+#  elif XXH_VECTOR == XXH_SVE   /* sve */
+#     define XXH_ACC_ALIGN 64
 #  endif
 #endif
 
 #if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
     || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
 #  define XXH_SEC_ALIGN XXH_ACC_ALIGN
+#elif XXH_VECTOR == XXH_SVE
+#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
 #else
 #  define XXH_SEC_ALIGN 8
 #endif
 
+#if defined(__GNUC__) || defined(__clang__)
+#  define XXH_ALIASING __attribute__((may_alias))
+#else
+#  define XXH_ALIASING /* nothing */
+#endif
+
 /*
  * UGLY HACK:
  * GCC usually generates the best code with -O3 for xxHash.
@@ -2903,153 +3923,126 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
  */
 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
 #  pragma GCC push_options
 #  pragma GCC optimize("-O2")
 #endif
 
-
 #if XXH_VECTOR == XXH_NEON
+
 /*
- * NEON's setup for vmlal_u32 is a little more complicated than it is on
- * SSE2, AVX2, and VSX.
- *
- * While PMULUDQ and VMULEUW both perform a mask, VMLAL.U32 performs an upcast.
- *
- * To do the same operation, the 128-bit 'Q' register needs to be split into
- * two 64-bit 'D' registers, performing this operation::
- *
- *   [                a                 |                 b                ]
- *            |              '---------. .--------'                |
- *            |                         x                          |
- *            |              .---------' '--------.                |
- *   [ a & 0xFFFFFFFF | b & 0xFFFFFFFF ],[    a >> 32     |     b >> 32    ]
- *
- * Due to significant changes in aarch64, the fastest method for aarch64 is
- * completely different than the fastest method for ARMv7-A.
+ * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
+ * optimizes out the entire hashLong loop because of the aliasing violation.
  *
- * ARMv7-A treats D registers as unions overlaying Q registers, so modifying
- * D11 will modify the high half of Q5. This is similar to how modifying AH
- * will only affect bits 8-15 of AX on x86.
- *
- * VZIP takes two registers, and puts even lanes in one register and odd lanes
- * in the other.
- *
- * On ARMv7-A, this strangely modifies both parameters in place instead of
- * taking the usual 3-operand form.
- *
- * Therefore, if we want to do this, we can simply use a D-form VZIP.32 on the
- * lower and upper halves of the Q register to end up with the high and low
- * halves where we want - all in one instruction.
- *
- *   vzip.32   d10, d11       @ d10 = { d10[0], d11[0] }; d11 = { d10[1], d11[1] }
- *
- * Unfortunately we need inline assembly for this: Instructions modifying two
- * registers at once is not possible in GCC or Clang's IR, and they have to
- * create a copy.
- *
- * aarch64 requires a different approach.
- *
- * In order to make it easier to write a decent compiler for aarch64, many
- * quirks were removed, such as conditional execution.
- *
- * NEON was also affected by this.
- *
- * aarch64 cannot access the high bits of a Q-form register, and writes to a
- * D-form register zero the high bits, similar to how writes to W-form scalar
- * registers (or DWORD registers on x86_64) work.
- *
- * The formerly free vget_high intrinsics now require a vext (with a few
- * exceptions)
- *
- * Additionally, VZIP was replaced by ZIP1 and ZIP2, which are the equivalent
- * of PUNPCKL* and PUNPCKH* in SSE, respectively, in order to only modify one
- * operand.
- *
- * The equivalent of the VZIP.32 on the lower and upper halves would be this
- * mess:
- *
- *   ext     v2.4s, v0.4s, v0.4s, #2 // v2 = { v0[2], v0[3], v0[0], v0[1] }
- *   zip1    v1.2s, v0.2s, v2.2s     // v1 = { v0[0], v2[0] }
- *   zip2    v0.2s, v0.2s, v1.2s     // v0 = { v0[1], v2[1] }
+ * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
+ * so the only option is to mark it as aliasing.
+ */
+typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
+
+/*!
+ * @internal
+ * @brief `vld1q_u64` but faster and alignment-safe.
  *
- * Instead, we use a literal downcast, vmovn_u64 (XTN), and vshrn_n_u64 (SHRN):
+ * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
+ * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
  *
- *   shrn    v1.2s, v0.2d, #32  // v1 = (uint32x2_t)(v0 >> 32);
- *   xtn     v0.2s, v0.2d       // v0 = (uint32x2_t)(v0 & 0xFFFFFFFF);
+ * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
+ * prohibits load-store optimizations. Therefore, a direct dereference is used.
  *
- * This is available on ARMv7-A, but is less efficient than a single VZIP.32.
+ * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
+ * unaligned load.
  */
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
+{
+    return *(xxh_aliasing_uint64x2_t const *)ptr;
+}
+#else
+XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
+{
+    return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
+}
+#endif
 
 /*!
- * Function-like macro:
- * void XXH_SPLIT_IN_PLACE(uint64x2_t &in, uint32x2_t &outLo, uint32x2_t &outHi)
- * {
- *     outLo = (uint32x2_t)(in & 0xFFFFFFFF);
- *     outHi = (uint32x2_t)(in >> 32);
- *     in = UNDEFINED;
- * }
+ * @internal
+ * @brief `vmlal_u32` on low and high halves of a vector.
+ *
+ * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
+ * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
+ * with `vmlal_u32`.
  */
-# if !defined(XXH_NO_VZIP_HACK) /* define to disable */ \
-   && (defined(__GNUC__) || defined(__clang__)) \
-   && (defined(__arm__) || defined(__thumb__) || defined(_M_ARM))
-#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                              \
-    do {                                                                                    \
-      /* Undocumented GCC/Clang operand modifier: %e0 = lower D half, %f0 = upper D half */ \
-      /* https://github.com/gcc-mirror/gcc/blob/38cf91e5/gcc/config/arm/arm.c#L22486 */     \
-      /* https://github.com/llvm-mirror/llvm/blob/2c4ca683/lib/Target/ARM/ARMAsmPrinter.cpp#L399 */ \
-      __asm__("vzip.32  %e0, %f0" : "+w" (in));                                             \
-      (outLo) = vget_low_u32 (vreinterpretq_u32_u64(in));                                   \
-      (outHi) = vget_high_u32(vreinterpretq_u32_u64(in));                                   \
-   } while (0)
-# else
-#  define XXH_SPLIT_IN_PLACE(in, outLo, outHi)                                            \
-    do {                                                                                  \
-      (outLo) = vmovn_u64    (in);                                                        \
-      (outHi) = vshrn_n_u64  ((in), 32);                                                  \
-    } while (0)
-# endif
+#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    /* Inline assembly is the only way */
+    __asm__("umlal   %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
+    return acc;
+}
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    /* This intrinsic works as expected */
+    return vmlal_high_u32(acc, lhs, rhs);
+}
+#else
+/* Portable intrinsic versions */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
+}
+/*! @copydoc XXH_vmlal_low_u32
+ * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
+XXH_FORCE_INLINE uint64x2_t
+XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
+{
+    return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
+}
+#endif
 
 /*!
  * @ingroup tuning
  * @brief Controls the NEON to scalar ratio for XXH3
  *
- * On AArch64 when not optimizing for size, XXH3 will run 6 lanes using NEON and
- * 2 lanes on scalar by default.
+ * This can be set to 2, 4, 6, or 8.
  *
- * This can be set to 2, 4, 6, or 8. ARMv7 will default to all 8 NEON lanes, as the
- * emulated 64-bit arithmetic is too slow.
+ * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
  *
- * Modern ARM CPUs are _very_ sensitive to how their pipelines are used.
+ * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
+ * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
+ * bandwidth.
  *
- * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but it can't
- * have more than 2 NEON (F0/F1) micro-ops. If you are only using NEON instructions,
- * you are only using 2/3 of the CPU bandwidth.
- *
- * This is even more noticeable on the more advanced cores like the A76 which
+ * This is even more noticeable on the more advanced cores like the Cortex-A76 which
  * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
  *
- * Therefore, @ref XXH3_NEON_LANES lanes will be processed using NEON, and the
- * remaining lanes will use scalar instructions. This improves the bandwidth
- * and also gives the integer pipelines something to do besides twiddling loop
- * counters and pointers.
+ * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
+ * and 2 scalar lanes, which is chosen by default.
+ *
+ * This does not apply to Apple processors or 32-bit processors, which run better with
+ * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
  *
  * This change benefits CPUs with large micro-op buffers without negatively affecting
- * other CPUs:
+ * most other CPUs:
  *
  *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
  *  |:----------------------|:--------------------|----------:|-----------:|------:|
  *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
  *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
  *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
+ *  | Apple M1              | 4 NEON/8 micro-ops  | 37.3 GB/s |  36.1 GB/s |  ~-3% |
  *
  * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
  *
+ * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning
+ * it effectively becomes worse 4.
+ *
  * @see XXH3_accumulate_512_neon()
  */
 # ifndef XXH3_NEON_LANES
 #  if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
-   && !defined(__OPTIMIZE_SIZE__)
+   && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
 #   define XXH3_NEON_LANES 6
 #  else
 #   define XXH3_NEON_LANES XXH_ACC_NB
@@ -3066,27 +4059,42 @@ enum XXH_VECTOR_TYPE /* fake enum */ {
  * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
  */
 #if XXH_VECTOR == XXH_VSX
+/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
+ * and `pixel`. This is a problem for obvious reasons.
+ *
+ * These keywords are unnecessary; the spec literally says they are
+ * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
+ * after including the header.
+ *
+ * We use pragma push_macro/pop_macro to keep the namespace clean. */
+#  pragma push_macro("bool")
+#  pragma push_macro("vector")
+#  pragma push_macro("pixel")
+/* silence potential macro redefined warnings */
+#  undef bool
+#  undef vector
+#  undef pixel
+
 #  if defined(__s390x__)
 #    include <s390intrin.h>
 #  else
-/* gcc's altivec.h can have the unwanted consequence to unconditionally
- * #define bool, vector, and pixel keywords,
- * with bad consequences for programs already using these keywords for other purposes.
- * The paragraph defining these macros is skipped when __APPLE_ALTIVEC__ is defined.
- * __APPLE_ALTIVEC__ is _generally_ defined automatically by the compiler,
- * but it seems that, in some cases, it isn't.
- * Force the build macro to be defined, so that keywords are not altered.
- */
-#    if defined(__GNUC__) && !defined(__APPLE_ALTIVEC__)
-#      define __APPLE_ALTIVEC__
-#    endif
 #    include <altivec.h>
 #  endif
 
+/* Restore the original macro values, if applicable. */
+#  pragma pop_macro("pixel")
+#  pragma pop_macro("vector")
+#  pragma pop_macro("bool")
+
 typedef __vector unsigned long long xxh_u64x2;
 typedef __vector unsigned char xxh_u8x16;
 typedef __vector unsigned xxh_u32x4;
 
+/*
+ * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
+ */
+typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
+
 # ifndef XXH_VSX_BE
 #  if defined(__BIG_ENDIAN__) \
   || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
@@ -3138,8 +4146,9 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
  /* s390x is always big endian, no issue on this platform */
 #  define XXH_vec_mulo vec_mulo
 #  define XXH_vec_mule vec_mule
-# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw)
+# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
 /* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
+ /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
 #  define XXH_vec_mulo __builtin_altivec_vmulouw
 #  define XXH_vec_mule __builtin_altivec_vmuleuw
 # else
@@ -3160,13 +4169,28 @@ XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
 # endif /* XXH_vec_mulo, XXH_vec_mule */
 #endif /* XXH_VECTOR == XXH_VSX */
 
+#if XXH_VECTOR == XXH_SVE
+#define ACCRND(acc, offset) \
+do { \
+    svuint64_t input_vec = svld1_u64(mask, xinput + offset);         \
+    svuint64_t secret_vec = svld1_u64(mask, xsecret + offset);       \
+    svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec);     \
+    svuint64_t swapped = svtbl_u64(input_vec, kSwap);                \
+    svuint64_t mixed_lo = svextw_u64_x(mask, mixed);                 \
+    svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32);            \
+    svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
+    acc = svadd_u64_x(mask, acc, mul);                               \
+} while (0)
+#endif /* XXH_VECTOR == XXH_SVE */
 
 /* prefetch
  * can be disabled, by declaring XXH_NO_PREFETCH build macro */
 #if defined(XXH_NO_PREFETCH)
 #  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
 #else
-#  if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
+#  if XXH_SIZE_OPT >= 1
+#    define XXH_PREFETCH(ptr) (void)(ptr)
+#  elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
 #    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
 #    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
 #  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
@@ -3203,6 +4227,8 @@ XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
     0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
 };
 
+static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;  /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */
+static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;  /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */
 
 #ifdef XXH_OLD_NAMES
 #  define kSecret XXH3_kSecret
@@ -3394,7 +4420,7 @@ XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
 }
 
 /*! Seems to produce slightly better code on GCC for some reason. */
-XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
+XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
 {
     XXH_ASSERT(0 <= shift && shift < 64);
     return v64 ^ (v64 >> shift);
@@ -3407,7 +4433,7 @@ XXH_FORCE_INLINE xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
 static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
 {
     h64 = XXH_xorshift64(h64, 37);
-    h64 *= 0x165667919E3779F9ULL;
+    h64 *= PRIME_MX1;
     h64 = XXH_xorshift64(h64, 32);
     return h64;
 }
@@ -3421,9 +4447,9 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
 {
     /* this mix is inspired by Pelle Evensen's rrmxmx */
     h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
-    h64 *= 0x9FB21C651E98DF25ULL;
+    h64 *= PRIME_MX2;
     h64 ^= (h64 >> 35) + len ;
-    h64 *= 0x9FB21C651E98DF25ULL;
+    h64 *= PRIME_MX2;
     return XXH_xorshift64(h64, 28);
 }
 
@@ -3461,7 +4487,7 @@ static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
  *
  * This adds an extra layer of strength for custom secrets.
  */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3483,7 +4509,7 @@ XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3499,7 +4525,7 @@ XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_h
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -3516,7 +4542,7 @@ XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
     }
 }
 
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(len <= 16);
@@ -3586,7 +4612,7 @@ XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
 }
 
 /* For mid range keys, XXH3 uses a Mum-hash variant. */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                      XXH64_hash_t seed)
@@ -3595,6 +4621,14 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
     XXH_ASSERT(16 < len && len <= 128);
 
     {   xxh_u64 acc = len * XXH_PRIME64_1;
+#if XXH_SIZE_OPT >= 1
+        /* Smaller and cleaner, but slightly slower. */
+        unsigned int i = (unsigned int)(len - 1) / 32;
+        do {
+            acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
+            acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
+        } while (i-- != 0);
+#else
         if (len > 32) {
             if (len > 64) {
                 if (len > 96) {
@@ -3609,14 +4643,17 @@ XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
         }
         acc += XXH3_mix16B(input+0, secret+0, seed);
         acc += XXH3_mix16B(input+len-16, secret+16, seed);
-
+#endif
         return XXH3_avalanche(acc);
     }
 }
 
+/*!
+ * @brief Maximum size of "short" key in bytes.
+ */
 #define XXH3_MIDSIZE_MAX 240
 
-XXH_NO_INLINE XXH64_hash_t
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
 XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                       XXH64_hash_t seed)
@@ -3628,13 +4665,17 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
     #define XXH3_MIDSIZE_LASTOFFSET  17
 
     {   xxh_u64 acc = len * XXH_PRIME64_1;
-        int const nbRounds = (int)len / 16;
-        int i;
+        xxh_u64 acc_end;
+        unsigned int const nbRounds = (unsigned int)len / 16;
+        unsigned int i;
+        XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
         for (i=0; i<8; i++) {
             acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
         }
-        acc = XXH3_avalanche(acc);
+        /* last bytes */
+        acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
         XXH_ASSERT(nbRounds >= 8);
+        acc = XXH3_avalanche(acc);
 #if defined(__clang__)                                /* Clang */ \
     && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
     && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
@@ -3661,11 +4702,13 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
         #pragma clang loop vectorize(disable)
 #endif
         for (i=8 ; i < nbRounds; i++) {
-            acc += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
+            /*
+             * Prevents clang for unrolling the acc loop and interleaving with this one.
+             */
+            XXH_COMPILER_GUARD(acc);
+            acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
         }
-        /* last bytes */
-        acc += XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
-        return XXH3_avalanche(acc);
+        return XXH3_avalanche(acc + acc_end);
     }
 }
 
@@ -3681,6 +4724,47 @@ XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
 #  define ACC_NB XXH_ACC_NB
 #endif
 
+#ifndef XXH_PREFETCH_DIST
+#  ifdef __clang__
+#    define XXH_PREFETCH_DIST 320
+#  else
+#    if (XXH_VECTOR == XXH_AVX512)
+#      define XXH_PREFETCH_DIST 512
+#    else
+#      define XXH_PREFETCH_DIST 384
+#    endif
+#  endif  /* __clang__ */
+#endif  /* XXH_PREFETCH_DIST */
+
+/*
+ * These macros are to generate an XXH3_accumulate() function.
+ * The two arguments select the name suffix and target attribute.
+ *
+ * The name of this symbol is XXH3_accumulate_<name>() and it calls
+ * XXH3_accumulate_512_<name>().
+ *
+ * It may be useful to hand implement this function if the compiler fails to
+ * optimize the inline function.
+ */
+#define XXH3_ACCUMULATE_TEMPLATE(name)                      \
+void                                                        \
+XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc,           \
+                       const xxh_u8* XXH_RESTRICT input,    \
+                       const xxh_u8* XXH_RESTRICT secret,   \
+                       size_t nbStripes)                    \
+{                                                           \
+    size_t n;                                               \
+    for (n = 0; n < nbStripes; n++ ) {                      \
+        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  \
+        XXH_PREFETCH(in + XXH_PREFETCH_DIST);               \
+        XXH3_accumulate_512_##name(                         \
+                 acc,                                       \
+                 in,                                        \
+                 secret + n*XXH_SECRET_CONSUME_RATE);       \
+    }                                                       \
+}
+
+
 XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
 {
     if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
@@ -3749,7 +4833,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
         /* data_key    = data_vec ^ key_vec; */
         __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
         /* data_key_lo = data_key >> 32; */
-        __m512i const data_key_lo = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
         /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
         __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
         /* xacc[0] += swap(data_vec); */
@@ -3759,6 +4843,7 @@ XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
         *xacc = _mm512_add_epi64(product, sum);
     }
 }
+XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
 
 /*
  * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
@@ -3792,13 +4877,12 @@ XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
         /* xacc[0] ^= (xacc[0] >> 47) */
         __m512i const acc_vec     = *xacc;
         __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
-        __m512i const data_vec    = _mm512_xor_si512     (acc_vec, shifted);
         /* xacc[0] ^= secret; */
         __m512i const key_vec     = _mm512_loadu_si512   (secret);
-        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
+        __m512i const data_key    = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
 
         /* xacc[0] *= XXH_PRIME32_1; */
-        __m512i const data_key_hi = _mm512_shuffle_epi32 (data_key, (_MM_PERM_ENUM)_MM_SHUFFLE(0, 3, 0, 1));
+        __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
         __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
         __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
         *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
@@ -3813,7 +4897,8 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
     XXH_ASSERT(((size_t)customSecret & 63) == 0);
     (void)(&XXH_writeLE64);
     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
-        __m512i const seed = _mm512_mask_set1_epi64(_mm512_set1_epi64((xxh_i64)seed64), 0xAA, (xxh_i64)(0U - seed64));
+        __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
+        __m512i const seed     = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
 
         const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
               __m512i* const dest = (      __m512i*) customSecret;
@@ -3821,14 +4906,7 @@ XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
         XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
         XXH_ASSERT(((size_t)dest & 63) == 0);
         for (i=0; i < nbRounds; ++i) {
-            /* GCC has a bug, _mm512_stream_load_si512 accepts 'void*', not 'void const*',
-             * this will warn "discards 'const' qualifier". */
-            union {
-                const __m512i* cp;
-                void* p;
-            } remote_const_void;
-            remote_const_void.cp = src + i;
-            dest[i] = _mm512_add_epi64(_mm512_stream_load_si512(remote_const_void.p), seed);
+            dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
     }   }
 }
 
@@ -3864,7 +4942,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
             /* data_key    = data_vec ^ key_vec; */
             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
             /* data_key_lo = data_key >> 32; */
-            __m256i const data_key_lo = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
             /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
             __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
             /* xacc[i] += swap(data_vec); */
@@ -3874,6 +4952,7 @@ XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
             xacc[i] = _mm256_add_epi64(product, sum);
     }   }
 }
+XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
 
 XXH_FORCE_INLINE XXH_TARGET_AVX2 void
 XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
@@ -3896,7 +4975,7 @@ XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
             __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
 
             /* xacc[i] *= XXH_PRIME32_1; */
-            __m256i const data_key_hi = _mm256_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
+            __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
             __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
             __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
             xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
@@ -3928,12 +5007,12 @@ XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTR
         XXH_ASSERT(((size_t)dest & 31) == 0);
 
         /* GCC -O2 need unroll loop manually */
-        dest[0] = _mm256_add_epi64(_mm256_stream_load_si256(src+0), seed);
-        dest[1] = _mm256_add_epi64(_mm256_stream_load_si256(src+1), seed);
-        dest[2] = _mm256_add_epi64(_mm256_stream_load_si256(src+2), seed);
-        dest[3] = _mm256_add_epi64(_mm256_stream_load_si256(src+3), seed);
-        dest[4] = _mm256_add_epi64(_mm256_stream_load_si256(src+4), seed);
-        dest[5] = _mm256_add_epi64(_mm256_stream_load_si256(src+5), seed);
+        dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
+        dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
+        dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
+        dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
+        dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
+        dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
     }
 }
 
@@ -3980,6 +5059,7 @@ XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
             xacc[i] = _mm_add_epi64(product, sum);
     }   }
 }
+XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
 
 XXH_FORCE_INLINE XXH_TARGET_SSE2 void
 XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
@@ -4058,14 +5138,28 @@ XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
 
 /*!
  * @internal
- * @brief The bulk processing loop for NEON.
+ * @brief The bulk processing loop for NEON and WASM SIMD128.
  *
  * The NEON code path is actually partially scalar when running on AArch64. This
  * is to optimize the pipelining and can have up to 15% speedup depending on the
  * CPU, and it also mitigates some GCC codegen issues.
  *
  * @see XXH3_NEON_LANES for configuring this and details about this optimization.
+ *
+ * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
+ * integers instead of the other platforms which mask full 64-bit vectors,
+ * so the setup is more complicated than just shifting right.
+ *
+ * Additionally, there is an optimization for 4 lanes at once noted below.
+ *
+ * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
+ * there needs to be *three* versions of the accumulate operation used
+ * for the remaining 2 lanes.
+ *
+ * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap
+ * nearly perfectly.
  */
+
 XXH_FORCE_INLINE void
 XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
                     const void* XXH_RESTRICT input,
@@ -4073,101 +5167,182 @@ XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
     XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
-    {
-        uint64x2_t* const xacc = (uint64x2_t *) acc;
+    {   /* GCC for darwin arm64 does not like aliasing here */
+        xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
         /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
-        uint8_t const* const xinput = (const uint8_t *) input;
-        uint8_t const* const xsecret  = (const uint8_t *) secret;
+        uint8_t const* xinput = (const uint8_t *) input;
+        uint8_t const* xsecret  = (const uint8_t *) secret;
 
         size_t i;
-        /* NEON for the first few lanes (these loops are normally interleaved) */
-        for (i=0; i < XXH3_NEON_LANES / 2; i++) {
+#ifdef __wasm_simd128__
+        /*
+         * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret
+         * is constant propagated, which results in it converting it to this
+         * inside the loop:
+         *
+         *    a = v128.load(XXH3_kSecret +  0 + $secret_offset, offset = 0)
+         *    b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)
+         *    ...
+         *
+         * This requires a full 32-bit address immediate (and therefore a 6 byte
+         * instruction) as well as an add for each offset.
+         *
+         * Putting an asm guard prevents it from folding (at the cost of losing
+         * the alignment hint), and uses the free offset in `v128.load` instead
+         * of adding secret_offset each time which overall reduces code size by
+         * about a kilobyte and improves performance.
+         */
+        XXH_COMPILER_GUARD(xsecret);
+#endif
+        /* Scalar lanes use the normal scalarRound routine */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+            XXH3_scalarRound(acc, input, secret, i);
+        }
+        i = 0;
+        /* 4 NEON lanes at a time. */
+        for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
             /* data_vec = xinput[i]; */
-            uint8x16_t data_vec    = vld1q_u8(xinput  + (i * 16));
+            uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput  + (i * 16));
+            uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput  + ((i+1) * 16));
             /* key_vec  = xsecret[i];  */
-            uint8x16_t key_vec     = vld1q_u8(xsecret + (i * 16));
-            uint64x2_t data_key;
-            uint32x2_t data_key_lo, data_key_hi;
-            /* xacc[i] += swap(data_vec); */
-            uint64x2_t const data64  = vreinterpretq_u64_u8(data_vec);
-            uint64x2_t const swapped = vextq_u64(data64, data64, 1);
-            xacc[i] = vaddq_u64 (xacc[i], swapped);
+            uint64x2_t key_vec_1  = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t key_vec_2  = XXH_vld1q_u64(xsecret + ((i+1) * 16));
+            /* data_swap = swap(data_vec) */
+            uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
+            uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
             /* data_key = data_vec ^ key_vec; */
-            data_key = vreinterpretq_u64_u8(veorq_u8(data_vec, key_vec));
-            /* data_key_lo = (uint32x2_t) (data_key & 0xFFFFFFFF);
-             * data_key_hi = (uint32x2_t) (data_key >> 32);
-             * data_key = UNDEFINED; */
-            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
-            /* xacc[i] += (uint64x2_t) data_key_lo * (uint64x2_t) data_key_hi; */
-            xacc[i] = vmlal_u32 (xacc[i], data_key_lo, data_key_hi);
+            uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
+            uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
 
+            /*
+             * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
+             * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
+             * get one vector with the low 32 bits of each lane, and one vector
+             * with the high 32 bits of each lane.
+             *
+             * The intrinsic returns a double vector because the original ARMv7-a
+             * instruction modified both arguments in place. AArch64 and SIMD128 emit
+             * two instructions from this intrinsic.
+             *
+             *  [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
+             *  [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
+             */
+            uint32x4x2_t unzipped = vuzpq_u32(
+                vreinterpretq_u32_u64(data_key_1),
+                vreinterpretq_u32_u64(data_key_2)
+            );
+            /* data_key_lo = data_key & 0xFFFFFFFF */
+            uint32x4_t data_key_lo = unzipped.val[0];
+            /* data_key_hi = data_key >> 32 */
+            uint32x4_t data_key_hi = unzipped.val[1];
+            /*
+             * Then, we can split the vectors horizontally and multiply which, as for most
+             * widening intrinsics, have a variant that works on both high half vectors
+             * for free on AArch64. A similar instruction is available on SIMD128.
+             *
+             * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
+             */
+            uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
+            uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
+            /*
+             * Clang reorders
+             *    a += b * c;     // umlal   swap.2d, dkl.2s, dkh.2s
+             *    c += a;         // add     acc.2d, acc.2d, swap.2d
+             * to
+             *    c += a;         // add     acc.2d, acc.2d, swap.2d
+             *    c += b * c;     // umlal   acc.2d, dkl.2s, dkh.2s
+             *
+             * While it would make sense in theory since the addition is faster,
+             * for reasons likely related to umlal being limited to certain NEON
+             * pipelines, this is worse. A compiler guard fixes this.
+             */
+            XXH_COMPILER_GUARD_CLANG_NEON(sum_1);
+            XXH_COMPILER_GUARD_CLANG_NEON(sum_2);
+            /* xacc[i] = acc_vec + sum; */
+            xacc[i]   = vaddq_u64(xacc[i], sum_1);
+            xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
         }
-        /* Scalar for the remainder. This may be a zero iteration loop. */
-        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
-            XXH3_scalarRound(acc, input, secret, i);
+        /* Operate on the remaining NEON lanes 2 at a time. */
+        for (; i < XXH3_NEON_LANES / 2; i++) {
+            /* data_vec = xinput[i]; */
+            uint64x2_t data_vec = XXH_vld1q_u64(xinput  + (i * 16));
+            /* key_vec  = xsecret[i];  */
+            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
+            /* acc_vec_2 = swap(data_vec) */
+            uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
+            /* data_key = data_vec ^ key_vec; */
+            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
+            /* For two lanes, just use VMOVN and VSHRN. */
+            /* data_key_lo = data_key & 0xFFFFFFFF; */
+            uint32x2_t data_key_lo = vmovn_u64(data_key);
+            /* data_key_hi = data_key >> 32; */
+            uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
+            /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
+            uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
+            /* Same Clang workaround as before */
+            XXH_COMPILER_GUARD_CLANG_NEON(sum);
+            /* xacc[i] = acc_vec + sum; */
+            xacc[i] = vaddq_u64 (xacc[i], sum);
         }
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
 
 XXH_FORCE_INLINE void
 XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
 
-    {   uint64x2_t* xacc       = (uint64x2_t*) acc;
+    {   xxh_aliasing_uint64x2_t* xacc       = (xxh_aliasing_uint64x2_t*) acc;
         uint8_t const* xsecret = (uint8_t const*) secret;
-        uint32x2_t prime       = vdup_n_u32 (XXH_PRIME32_1);
 
         size_t i;
-        /* NEON for the first few lanes (these loops are normally interleaved) */
+        /* WASM uses operator overloads and doesn't need these. */
+#ifndef __wasm_simd128__
+        /* { prime32_1, prime32_1 } */
+        uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);
+        /* { 0, prime32_1, 0, prime32_1 } */
+        uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32));
+#endif
+
+        /* AArch64 uses both scalar and neon at the same time */
+        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
+            XXH3_scalarScrambleRound(acc, secret, i);
+        }
         for (i=0; i < XXH3_NEON_LANES / 2; i++) {
             /* xacc[i] ^= (xacc[i] >> 47); */
             uint64x2_t acc_vec  = xacc[i];
-            uint64x2_t shifted  = vshrq_n_u64 (acc_vec, 47);
-            uint64x2_t data_vec = veorq_u64   (acc_vec, shifted);
+            uint64x2_t shifted  = vshrq_n_u64(acc_vec, 47);
+            uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
 
             /* xacc[i] ^= xsecret[i]; */
-            uint8x16_t key_vec  = vld1q_u8    (xsecret + (i * 16));
-            uint64x2_t data_key = veorq_u64   (data_vec, vreinterpretq_u64_u8(key_vec));
-
+            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
+            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
             /* xacc[i] *= XXH_PRIME32_1 */
-            uint32x2_t data_key_lo, data_key_hi;
-            /* data_key_lo = (uint32x2_t) (xacc[i] & 0xFFFFFFFF);
-             * data_key_hi = (uint32x2_t) (xacc[i] >> 32);
-             * xacc[i] = UNDEFINED; */
-            XXH_SPLIT_IN_PLACE(data_key, data_key_lo, data_key_hi);
-            {   /*
-                 * prod_hi = (data_key >> 32) * XXH_PRIME32_1;
-                 *
-                 * Avoid vmul_u32 + vshll_n_u32 since Clang 6 and 7 will
-                 * incorrectly "optimize" this:
-                 *   tmp     = vmul_u32(vmovn_u64(a), vmovn_u64(b));
-                 *   shifted = vshll_n_u32(tmp, 32);
-                 * to this:
-                 *   tmp     = "vmulq_u64"(a, b); // no such thing!
-                 *   shifted = vshlq_n_u64(tmp, 32);
-                 *
-                 * However, unlike SSE, Clang lacks a 64-bit multiply routine
-                 * for NEON, and it scalarizes two 64-bit multiplies instead.
-                 *
-                 * vmull_u32 has the same timing as vmul_u32, and it avoids
-                 * this bug completely.
-                 * See https://bugs.llvm.org/show_bug.cgi?id=39967
-                 */
-                uint64x2_t prod_hi = vmull_u32 (data_key_hi, prime);
-                /* xacc[i] = prod_hi << 32; */
-                xacc[i] = vshlq_n_u64(prod_hi, 32);
-                /* xacc[i] += (prod_hi & 0xFFFFFFFF) * XXH_PRIME32_1; */
-                xacc[i] = vmlal_u32(xacc[i], data_key_lo, prime);
-            }
-        }
-        /* Scalar for the remainder. This may be a zero iteration loop. */
-        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
-            XXH3_scalarScrambleRound(acc, secret, i);
+#ifdef __wasm_simd128__
+            /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */
+            xacc[i] = data_key * XXH_PRIME32_1;
+#else
+            /*
+             * Expanded version with portable NEON intrinsics
+             *
+             *    lo(x) * lo(y) + (hi(x) * lo(y) << 32)
+             *
+             * prod_hi = hi(data_key) * lo(prime) << 32
+             *
+             * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector
+             * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits
+             * and avoid the shift.
+             */
+            uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi);
+            /* Extract low bits for vmlal_u32  */
+            uint32x2_t data_key_lo = vmovn_u64(data_key);
+            /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
+            xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
+#endif
         }
     }
 }
-
 #endif
 
 #if (XXH_VECTOR == XXH_VSX)
@@ -4178,23 +5353,23 @@ XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
                     const void* XXH_RESTRICT secret)
 {
     /* presumed aligned */
-    unsigned int* const xacc = (unsigned int*) acc;
-    xxh_u64x2 const* const xinput   = (xxh_u64x2 const*) input;   /* no alignment restriction */
-    xxh_u64x2 const* const xsecret  = (xxh_u64x2 const*) secret;    /* no alignment restriction */
+    xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+    xxh_u8 const* const xinput   = (xxh_u8 const*) input;   /* no alignment restriction */
+    xxh_u8 const* const xsecret  = (xxh_u8 const*) secret;    /* no alignment restriction */
     xxh_u64x2 const v32 = { 32, 32 };
     size_t i;
     for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
         /* data_vec = xinput[i]; */
-        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + i);
+        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
         /* key_vec = xsecret[i]; */
-        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
         xxh_u64x2 const data_key = data_vec ^ key_vec;
         /* shuffled = (data_key << 32) | (data_key >> 32); */
         xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
         /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
         xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
         /* acc_vec = xacc[i]; */
-        xxh_u64x2 acc_vec        = (xxh_u64x2)vec_xl(0, xacc + 4 * i);
+        xxh_u64x2 acc_vec        = xacc[i];
         acc_vec += product;
 
         /* swap high and low halves */
@@ -4203,18 +5378,18 @@ XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
 #else
         acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
 #endif
-        /* xacc[i] = acc_vec; */
-        vec_xst((xxh_u32x4)acc_vec, 0, xacc + 4 * i);
+        xacc[i] = acc_vec;
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
 
 XXH_FORCE_INLINE void
 XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 {
     XXH_ASSERT((((size_t)acc) & 15) == 0);
 
-    {         xxh_u64x2* const xacc    =       (xxh_u64x2*) acc;
-        const xxh_u64x2* const xsecret = (const xxh_u64x2*) secret;
+    {   xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
+        const xxh_u8* const xsecret = (const xxh_u8*) secret;
         /* constants */
         xxh_u64x2 const v32  = { 32, 32 };
         xxh_u64x2 const v47 = { 47, 47 };
@@ -4226,7 +5401,7 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
             xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
 
             /* xacc[i] ^= xsecret[i]; */
-            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + i);
+            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
             xxh_u64x2 const data_key = data_vec ^ key_vec;
 
             /* xacc[i] *= XXH_PRIME32_1 */
@@ -4240,8 +5415,148 @@ XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
 
 #endif
 
+#if (XXH_VECTOR == XXH_SVE)
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
+                   const void* XXH_RESTRICT input,
+                   const void* XXH_RESTRICT secret)
+{
+    uint64_t *xacc = (uint64_t *)acc;
+    const uint64_t *xinput = (const uint64_t *)(const void *)input;
+    const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+    svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+    uint64_t element_count = svcntd();
+    if (element_count >= 8) {
+        svbool_t mask = svptrue_pat_b64(SV_VL8);
+        svuint64_t vacc = svld1_u64(mask, xacc);
+        ACCRND(vacc, 0);
+        svst1_u64(mask, xacc, vacc);
+    } else if (element_count == 2) {   /* sve128 */
+        svbool_t mask = svptrue_pat_b64(SV_VL2);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+        svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+        svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 2);
+        ACCRND(acc2, 4);
+        ACCRND(acc3, 6);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 2, acc1);
+        svst1_u64(mask, xacc + 4, acc2);
+        svst1_u64(mask, xacc + 6, acc3);
+    } else {
+        svbool_t mask = svptrue_pat_b64(SV_VL4);
+        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+        svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+        ACCRND(acc0, 0);
+        ACCRND(acc1, 4);
+        svst1_u64(mask, xacc + 0, acc0);
+        svst1_u64(mask, xacc + 4, acc1);
+    }
+}
+
+XXH_FORCE_INLINE void
+XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
+               const xxh_u8* XXH_RESTRICT input,
+               const xxh_u8* XXH_RESTRICT secret,
+               size_t nbStripes)
+{
+    if (nbStripes != 0) {
+        uint64_t *xacc = (uint64_t *)acc;
+        const uint64_t *xinput = (const uint64_t *)(const void *)input;
+        const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
+        svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
+        uint64_t element_count = svcntd();
+        if (element_count >= 8) {
+            svbool_t mask = svptrue_pat_b64(SV_VL8);
+            svuint64_t vacc = svld1_u64(mask, xacc + 0);
+            do {
+                /* svprfd(svbool_t, void *, enum svfprop); */
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(vacc, 0);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, vacc);
+        } else if (element_count == 2) { /* sve128 */
+            svbool_t mask = svptrue_pat_b64(SV_VL2);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 2);
+            svuint64_t acc2 = svld1_u64(mask, xacc + 4);
+            svuint64_t acc3 = svld1_u64(mask, xacc + 6);
+            do {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 2);
+                ACCRND(acc2, 4);
+                ACCRND(acc3, 6);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, acc0);
+           svst1_u64(mask, xacc + 2, acc1);
+           svst1_u64(mask, xacc + 4, acc2);
+           svst1_u64(mask, xacc + 6, acc3);
+        } else {
+            svbool_t mask = svptrue_pat_b64(SV_VL4);
+            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
+            svuint64_t acc1 = svld1_u64(mask, xacc + 4);
+            do {
+                svprfd(mask, xinput + 128, SV_PLDL1STRM);
+                ACCRND(acc0, 0);
+                ACCRND(acc1, 4);
+                xinput += 8;
+                xsecret += 1;
+                nbStripes--;
+           } while (nbStripes != 0);
+
+           svst1_u64(mask, xacc + 0, acc0);
+           svst1_u64(mask, xacc + 4, acc1);
+       }
+    }
+}
+
+#endif
+
 /* scalar variants - universal */
 
+#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
+/*
+ * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
+ * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
+ *
+ * While this might not seem like much, as AArch64 is a 64-bit architecture, only
+ * big Cortex designs have a full 64-bit multiplier.
+ *
+ * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
+ * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
+ * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
+ *
+ * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
+ * not have this penalty and does the mask automatically.
+ */
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+    xxh_u64 ret;
+    /* note: %x = 64-bit register, %w = 32-bit register */
+    __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
+    return ret;
+}
+#else
+XXH_FORCE_INLINE xxh_u64
+XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
+{
+    return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
+}
+#endif
+
 /*!
  * @internal
  * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
@@ -4264,7 +5579,7 @@ XXH3_scalarRound(void* XXH_RESTRICT acc,
         xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
         xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
         xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
-        xacc[lane] += XXH_mult32to64(data_key & 0xFFFFFFFF, data_key >> 32);
+        xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
     }
 }
 
@@ -4278,10 +5593,18 @@ XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
                      const void* XXH_RESTRICT secret)
 {
     size_t i;
+    /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
+#if defined(__GNUC__) && !defined(__clang__) \
+  && (defined(__arm__) || defined(__thumb2__)) \
+  && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
+  && XXH_SIZE_OPT <= 0
+#  pragma GCC unroll 8
+#endif
     for (i=0; i < XXH_ACC_NB; i++) {
         XXH3_scalarRound(acc, input, secret, i);
     }
 }
+XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
 
 /*!
  * @internal
@@ -4333,10 +5656,10 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
     const xxh_u8* kSecretPtr = XXH3_kSecret;
     XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
 
-#if defined(__clang__) && defined(__aarch64__)
+#if defined(__GNUC__) && defined(__aarch64__)
     /*
      * UGLY HACK:
-     * Clang generates a bunch of MOV/MOVK pairs for aarch64, and they are
+     * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
      * placed sequentially, in order, at the top of the unrolled loop.
      *
      * While MOVK is great for generating constants (2 cycles for a 64-bit
@@ -4351,7 +5674,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
      * ADD
      * SUB      STR
      *          STR
-     * By forcing loads from memory (as the asm line causes Clang to assume
+     * By forcing loads from memory (as the asm line causes the compiler to assume
      * that XXH3_kSecretPtr has been changed), the pipelines are used more
      * efficiently:
      *   I   L   S
@@ -4368,17 +5691,11 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
      */
     XXH_COMPILER_GUARD(kSecretPtr);
 #endif
-    /*
-     * Note: in debug mode, this overrides the asm optimization
-     * and Clang will emit MOVK chains again.
-     */
-    XXH_ASSERT(kSecretPtr == XXH3_kSecret);
-
     {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
         int i;
         for (i=0; i < nbRounds; i++) {
             /*
-             * The asm hack causes Clang to assume that kSecretPtr aliases with
+             * The asm hack causes the compiler to assume that kSecretPtr aliases with
              * customSecret, and on aarch64, this prevented LDP from merging two
              * loads together for free. Putting the loads together before the stores
              * properly generates LDP.
@@ -4391,7 +5708,7 @@ XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
 }
 
 
-typedef void (*XXH3_f_accumulate_512)(void* XXH_RESTRICT, const void*, const void*);
+typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
 typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
 typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
 
@@ -4399,82 +5716,63 @@ typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
 #if (XXH_VECTOR == XXH_AVX512)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_avx512
+#define XXH3_accumulate     XXH3_accumulate_avx512
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
 
 #elif (XXH_VECTOR == XXH_AVX2)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_avx2
+#define XXH3_accumulate     XXH3_accumulate_avx2
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
 #define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
 
 #elif (XXH_VECTOR == XXH_SSE2)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_sse2
+#define XXH3_accumulate     XXH3_accumulate_sse2
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
 #define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
 
 #elif (XXH_VECTOR == XXH_NEON)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_neon
+#define XXH3_accumulate     XXH3_accumulate_neon
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
 #elif (XXH_VECTOR == XXH_VSX)
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_vsx
+#define XXH3_accumulate     XXH3_accumulate_vsx
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
+#elif (XXH_VECTOR == XXH_SVE)
+#define XXH3_accumulate_512 XXH3_accumulate_512_sve
+#define XXH3_accumulate     XXH3_accumulate_sve
+#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
+#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+
 #else /* scalar */
 
 #define XXH3_accumulate_512 XXH3_accumulate_512_scalar
+#define XXH3_accumulate     XXH3_accumulate_scalar
 #define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
 #define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
 
 #endif
 
-
-
-#ifndef XXH_PREFETCH_DIST
-#  ifdef __clang__
-#    define XXH_PREFETCH_DIST 320
-#  else
-#    if (XXH_VECTOR == XXH_AVX512)
-#      define XXH_PREFETCH_DIST 512
-#    else
-#      define XXH_PREFETCH_DIST 384
-#    endif
-#  endif  /* __clang__ */
-#endif  /* XXH_PREFETCH_DIST */
-
-/*
- * XXH3_accumulate()
- * Loops over XXH3_accumulate_512().
- * Assumption: nbStripes will not overflow the secret size
- */
-XXH_FORCE_INLINE void
-XXH3_accumulate(     xxh_u64* XXH_RESTRICT acc,
-                const xxh_u8* XXH_RESTRICT input,
-                const xxh_u8* XXH_RESTRICT secret,
-                      size_t nbStripes,
-                      XXH3_f_accumulate_512 f_acc512)
-{
-    size_t n;
-    for (n = 0; n < nbStripes; n++ ) {
-        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;
-        XXH_PREFETCH(in + XXH_PREFETCH_DIST);
-        f_acc512(acc,
-                 in,
-                 secret + n*XXH_SECRET_CONSUME_RATE);
-    }
-}
+#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
+#  undef XXH3_initCustomSecret
+#  define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
+#endif
 
 XXH_FORCE_INLINE void
 XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
                       const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_accumulate f_acc,
                             XXH3_f_scrambleAcc f_scramble)
 {
     size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
@@ -4486,7 +5784,7 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
     XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
 
     for (n = 0; n < nb_blocks; n++) {
-        XXH3_accumulate(acc, input + n*block_len, secret, nbStripesPerBlock, f_acc512);
+        f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
         f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
     }
 
@@ -4494,12 +5792,12 @@ XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
     XXH_ASSERT(len > XXH_STRIPE_LEN);
     {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
         XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
-        XXH3_accumulate(acc, input + nb_blocks*block_len, secret, nbStripes, f_acc512);
+        f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
 
         /* last stripe */
         {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
 #define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
-            f_acc512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
+            XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
     }   }
 }
 
@@ -4544,12 +5842,12 @@ XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secre
 XXH_FORCE_INLINE XXH64_hash_t
 XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
                            const void* XXH_RESTRICT secret, size_t secretSize,
-                           XXH3_f_accumulate_512 f_acc512,
+                           XXH3_f_accumulate f_acc,
                            XXH3_f_scrambleAcc f_scramble)
 {
     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
 
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc512, f_scramble);
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
 
     /* converge into final hash */
     XXH_STATIC_ASSERT(sizeof(acc) == 64);
@@ -4563,13 +5861,15 @@ XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
  * It's important for performance to transmit secret's size (when it's static)
  * so that the compiler can properly optimize the vectorized loop.
  * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
  */
-XXH_FORCE_INLINE XXH64_hash_t
+XXH3_WITH_SECRET_INLINE XXH64_hash_t
 XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
                              XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64;
-    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
@@ -4578,12 +5878,12 @@ XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
  * Note that inside this no_inline function, we do inline the internal loop,
  * and provide a statically defined secret size to allow optimization of vector loop.
  */
-XXH_NO_INLINE XXH64_hash_t
+XXH_NO_INLINE XXH_PUREF XXH64_hash_t
 XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
                           XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64; (void)secret; (void)secretLen;
-    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
@@ -4600,18 +5900,20 @@ XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
 XXH_FORCE_INLINE XXH64_hash_t
 XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
                                     XXH64_hash_t seed,
-                                    XXH3_f_accumulate_512 f_acc512,
+                                    XXH3_f_accumulate f_acc,
                                     XXH3_f_scrambleAcc f_scramble,
                                     XXH3_f_initCustomSecret f_initSec)
 {
+#if XXH_SIZE_OPT <= 0
     if (seed == 0)
         return XXH3_hashLong_64b_internal(input, len,
                                           XXH3_kSecret, sizeof(XXH3_kSecret),
-                                          f_acc512, f_scramble);
+                                          f_acc, f_scramble);
+#endif
     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         f_initSec(secret, seed);
         return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
-                                          f_acc512, f_scramble);
+                                          f_acc, f_scramble);
     }
 }
 
@@ -4619,12 +5921,12 @@ XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
  * It's important for performance that XXH3_hashLong is not inlined.
  */
 XXH_NO_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSeed(const void* input, size_t len,
-                           XXH64_hash_t seed, const xxh_u8* secret, size_t secretLen)
+XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
+                           XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)secret; (void)secretLen;
     return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
-                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
 }
 
 
@@ -4656,37 +5958,37 @@ XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
 
 /* ===   Public entry point   === */
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(const void* input, size_t len)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
 {
-    return XXH3_64bits_internal(input, len, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
+    return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
-    return XXH3_64bits_internal(input, len, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
+    return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
 {
-    return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
+    return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
 }
 
 XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_64bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
-    return XXH3_hashLong_64b_withSecret(input, len, seed, (const xxh_u8*)secret, secretSize);
+    if (length <= XXH3_MIDSIZE_MAX)
+        return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
+    return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
 }
 
 
 /* ===   XXH3 streaming   === */
-
+#ifndef XXH_NO_STREAM
 /*
  * Malloc's a pointer that is always aligned to align.
  *
@@ -4710,7 +6012,7 @@ XXH3_64bits_withSecretandSeed(const void* input, size_t len, const void* secret,
  *
  * Align must be a power of 2 and 8 <= align <= 128.
  */
-static void* XXH_alignedMalloc(size_t s, size_t align)
+static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
 {
     XXH_ASSERT(align <= 128 && align >= 8); /* range check */
     XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
@@ -4752,7 +6054,15 @@ static void XXH_alignedFree(void* p)
         XXH_free(base);
     }
 }
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Allocate an @ref XXH3_state_t.
+ *
+ * @return An allocated pointer of @ref XXH3_state_t on success.
+ * @return `NULL` on failure.
+ *
+ * @note Must be freed with XXH3_freeState().
+ */
 XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
 {
     XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
@@ -4761,16 +6071,25 @@ XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
     return state;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
+/*!
+ * @brief Frees an @ref XXH3_state_t.
+ *
+ * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
+ *
+ * @return @ref XXH_OK.
+ *
+ * @note Must be allocated with XXH3_createState().
+ */
 XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
 {
     XXH_alignedFree(statePtr);
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH3_copyState(XXH3_state_t* dst_state, const XXH3_state_t* src_state)
+XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
 {
     XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
 }
@@ -4802,18 +6121,18 @@ XXH3_reset_internal(XXH3_state_t* statePtr,
     statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset(XXH3_state_t* statePtr)
+XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
 {
     if (statePtr == NULL) return XXH_ERROR;
     XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     if (statePtr == NULL) return XXH_ERROR;
     XXH3_reset_internal(statePtr, 0, secret, secretSize);
@@ -4822,9 +6141,9 @@ XXH3_64bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
 {
     if (statePtr == NULL) return XXH_ERROR;
     if (seed==0) return XXH3_64bits_reset(statePtr);
@@ -4834,9 +6153,9 @@ XXH3_64bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed64)
+XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
 {
     if (statePtr == NULL) return XXH_ERROR;
     if (secret == NULL) return XXH_ERROR;
@@ -4846,35 +6165,61 @@ XXH3_64bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret,
     return XXH_OK;
 }
 
-/* Note : when XXH3_consumeStripes() is invoked,
- * there must be a guarantee that at least one more byte must be consumed from input
- * so that the function can blindly consume all stripes using the "normal" secret segment */
-XXH_FORCE_INLINE void
+/*!
+ * @internal
+ * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
+ *
+ * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
+ *
+ * @param acc                Pointer to the 8 accumulator lanes
+ * @param nbStripesSoFarPtr  In/out pointer to the number of leftover stripes in the block*
+ * @param nbStripesPerBlock  Number of stripes in a block
+ * @param input              Input pointer
+ * @param nbStripes          Number of stripes to process
+ * @param secret             Secret pointer
+ * @param secretLimit        Offset of the last block in @p secret
+ * @param f_acc              Pointer to an XXH3_accumulate implementation
+ * @param f_scramble         Pointer to an XXH3_scrambleAcc implementation
+ * @return                   Pointer past the end of @p input after processing
+ */
+XXH_FORCE_INLINE const xxh_u8 *
 XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
                     size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
                     const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
                     const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
-                    XXH3_f_accumulate_512 f_acc512,
+                    XXH3_f_accumulate f_acc,
                     XXH3_f_scrambleAcc f_scramble)
 {
-    XXH_ASSERT(nbStripes <= nbStripesPerBlock);  /* can handle max 1 scramble per invocation */
-    XXH_ASSERT(*nbStripesSoFarPtr < nbStripesPerBlock);
-    if (nbStripesPerBlock - *nbStripesSoFarPtr <= nbStripes) {
-        /* need a scrambling operation */
-        size_t const nbStripesToEndofBlock = nbStripesPerBlock - *nbStripesSoFarPtr;
-        size_t const nbStripesAfterBlock = nbStripes - nbStripesToEndofBlock;
-        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripesToEndofBlock, f_acc512);
-        f_scramble(acc, secret + secretLimit);
-        XXH3_accumulate(acc, input + nbStripesToEndofBlock * XXH_STRIPE_LEN, secret, nbStripesAfterBlock, f_acc512);
-        *nbStripesSoFarPtr = nbStripesAfterBlock;
-    } else {
-        XXH3_accumulate(acc, input, secret + nbStripesSoFarPtr[0] * XXH_SECRET_CONSUME_RATE, nbStripes, f_acc512);
+    const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
+    /* Process full blocks */
+    if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
+        /* Process the initial partial block... */
+        size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
+
+        do {
+            /* Accumulate and scramble */
+            f_acc(acc, input, initialSecret, nbStripesThisIter);
+            f_scramble(acc, secret + secretLimit);
+            input += nbStripesThisIter * XXH_STRIPE_LEN;
+            nbStripes -= nbStripesThisIter;
+            /* Then continue the loop with the full block size */
+            nbStripesThisIter = nbStripesPerBlock;
+            initialSecret = secret;
+        } while (nbStripes >= nbStripesPerBlock);
+        *nbStripesSoFarPtr = 0;
+    }
+    /* Process a partial block */
+    if (nbStripes > 0) {
+        f_acc(acc, input, initialSecret, nbStripes);
+        input += nbStripes * XXH_STRIPE_LEN;
         *nbStripesSoFarPtr += nbStripes;
     }
+    /* Return end pointer */
+    return input;
 }
 
 #ifndef XXH3_STREAM_USE_STACK
-# ifndef __clang__ /* clang doesn't need additional stack space */
+# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
 #   define XXH3_STREAM_USE_STACK 1
 # endif
 #endif
@@ -4884,7 +6229,7 @@ XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
 XXH_FORCE_INLINE XXH_errorcode
 XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
             const xxh_u8* XXH_RESTRICT input, size_t len,
-            XXH3_f_accumulate_512 f_acc512,
+            XXH3_f_accumulate f_acc,
             XXH3_f_scrambleAcc f_scramble)
 {
     if (input==NULL) {
@@ -4900,7 +6245,8 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
          * when operating accumulators directly into state.
          * Operating into stack space seems to enable proper optimization.
          * clang, on the other hand, doesn't seem to need this trick */
-        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8]; memcpy(acc, state->acc, sizeof(acc));
+        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
+        XXH_memcpy(acc, state->acc, sizeof(acc));
 #else
         xxh_u64* XXH_RESTRICT const acc = state->acc;
 #endif
@@ -4908,7 +6254,7 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
         XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
 
         /* small input : just fill in tmp buffer */
-        if (state->bufferedSize + len <= XXH3_INTERNALBUFFER_SIZE) {
+        if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
             XXH_memcpy(state->buffer + state->bufferedSize, input, len);
             state->bufferedSize += (XXH32_hash_t)len;
             return XXH_OK;
@@ -4930,57 +6276,20 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
                                &state->nbStripesSoFar, state->nbStripesPerBlock,
                                 state->buffer, XXH3_INTERNALBUFFER_STRIPES,
                                 secret, state->secretLimit,
-                                f_acc512, f_scramble);
+                                f_acc, f_scramble);
             state->bufferedSize = 0;
         }
         XXH_ASSERT(input < bEnd);
-
-        /* large input to consume : ingest per full block */
-        if ((size_t)(bEnd - input) > state->nbStripesPerBlock * XXH_STRIPE_LEN) {
+        if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
             size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
-            XXH_ASSERT(state->nbStripesPerBlock >= state->nbStripesSoFar);
-            /* join to current block's end */
-            {   size_t const nbStripesToEnd = state->nbStripesPerBlock - state->nbStripesSoFar;
-                XXH_ASSERT(nbStripesToEnd <= nbStripes);
-                XXH3_accumulate(acc, input, secret + state->nbStripesSoFar * XXH_SECRET_CONSUME_RATE, nbStripesToEnd, f_acc512);
-                f_scramble(acc, secret + state->secretLimit);
-                state->nbStripesSoFar = 0;
-                input += nbStripesToEnd * XXH_STRIPE_LEN;
-                nbStripes -= nbStripesToEnd;
-            }
-            /* consume per entire blocks */
-            while(nbStripes >= state->nbStripesPerBlock) {
-                XXH3_accumulate(acc, input, secret, state->nbStripesPerBlock, f_acc512);
-                f_scramble(acc, secret + state->secretLimit);
-                input += state->nbStripesPerBlock * XXH_STRIPE_LEN;
-                nbStripes -= state->nbStripesPerBlock;
-            }
-            /* consume last partial block */
-            XXH3_accumulate(acc, input, secret, nbStripes, f_acc512);
-            input += nbStripes * XXH_STRIPE_LEN;
-            XXH_ASSERT(input < bEnd);  /* at least some bytes left */
-            state->nbStripesSoFar = nbStripes;
-            /* buffer predecessor of last partial stripe */
-            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-            XXH_ASSERT(bEnd - input <= XXH_STRIPE_LEN);
-        } else {
-            /* content to consume <= block size */
-            /* Consume input by a multiple of internal buffer size */
-            if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
-                const xxh_u8* const limit = bEnd - XXH3_INTERNALBUFFER_SIZE;
-                do {
-                    XXH3_consumeStripes(acc,
+            input = XXH3_consumeStripes(acc,
                                        &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                        input, XXH3_INTERNALBUFFER_STRIPES,
-                                        secret, state->secretLimit,
-                                        f_acc512, f_scramble);
-                    input += XXH3_INTERNALBUFFER_SIZE;
-                } while (input<limit);
-                /* buffer predecessor of last partial stripe */
-                XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-            }
-        }
+                                       input, nbStripes,
+                                       secret, state->secretLimit,
+                                       f_acc, f_scramble);
+            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
 
+        }
         /* Some remaining input (always) : buffer it */
         XXH_ASSERT(input < bEnd);
         XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
@@ -4989,19 +6298,19 @@ XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
         state->bufferedSize = (XXH32_hash_t)(bEnd-input);
 #if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
         /* save stack accumulators into state */
-        memcpy(state->acc, acc, sizeof(acc));
+        XXH_memcpy(state->acc, acc, sizeof(acc));
 #endif
     }
 
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_update(XXH3_state_t* state, const void* input, size_t len)
+XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
     return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 
@@ -5010,37 +6319,40 @@ XXH3_digest_long (XXH64_hash_t* acc,
                   const XXH3_state_t* state,
                   const unsigned char* secret)
 {
+    xxh_u8 lastStripe[XXH_STRIPE_LEN];
+    const xxh_u8* lastStripePtr;
+
     /*
      * Digest on a local copy. This way, the state remains unaltered, and it can
      * continue ingesting more input afterwards.
      */
     XXH_memcpy(acc, state->acc, sizeof(state->acc));
     if (state->bufferedSize >= XXH_STRIPE_LEN) {
+        /* Consume remaining stripes then point to remaining data in buffer */
         size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
         size_t nbStripesSoFar = state->nbStripesSoFar;
         XXH3_consumeStripes(acc,
                            &nbStripesSoFar, state->nbStripesPerBlock,
                             state->buffer, nbStripes,
                             secret, state->secretLimit,
-                            XXH3_accumulate_512, XXH3_scrambleAcc);
-        /* last stripe */
-        XXH3_accumulate_512(acc,
-                            state->buffer + state->bufferedSize - XXH_STRIPE_LEN,
-                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+                            XXH3_accumulate, XXH3_scrambleAcc);
+        lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
     } else {  /* bufferedSize < XXH_STRIPE_LEN */
-        xxh_u8 lastStripe[XXH_STRIPE_LEN];
+        /* Copy to temp buffer */
         size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
         XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
         XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
         XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
-        XXH3_accumulate_512(acc,
-                            lastStripe,
-                            secret + state->secretLimit - XXH_SECRET_LASTACC_START);
+        lastStripePtr = lastStripe;
     }
+    /* Last stripe */
+    XXH3_accumulate_512(acc,
+                        lastStripePtr,
+                        secret + state->secretLimit - XXH_SECRET_LASTACC_START);
 }
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
 {
     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
     if (state->totalLen > XXH3_MIDSIZE_MAX) {
@@ -5056,7 +6368,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
     return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
                                   secret, state->secretLimit + XXH_STRIPE_LEN);
 }
-
+#endif /* !XXH_NO_STREAM */
 
 
 /* ==========================================
@@ -5076,7 +6388,7 @@ XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (const XXH3_state_t* state)
  * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
  */
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     /* A doubled version of 1to3_64b with different constants. */
@@ -5105,7 +6417,7 @@ XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
     }
 }
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -5125,14 +6437,14 @@ XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_
         m128.low64  ^= (m128.high64 >> 3);
 
         m128.low64   = XXH_xorshift64(m128.low64, 35);
-        m128.low64  *= 0x9FB21C651E98DF25ULL;
+        m128.low64  *= PRIME_MX2;
         m128.low64   = XXH_xorshift64(m128.low64, 28);
         m128.high64  = XXH3_avalanche(m128.high64);
         return m128;
     }
 }
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(input != NULL);
@@ -5207,7 +6519,7 @@ XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64
 /*
  * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
  */
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
 {
     XXH_ASSERT(len <= 16);
@@ -5238,7 +6550,7 @@ XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
 }
 
 
-XXH_FORCE_INLINE XXH128_hash_t
+XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                       XXH64_hash_t seed)
@@ -5249,6 +6561,16 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     {   XXH128_hash_t acc;
         acc.low64 = len * XXH_PRIME64_1;
         acc.high64 = 0;
+
+#if XXH_SIZE_OPT >= 1
+        {
+            /* Smaller, but slightly slower. */
+            unsigned int i = (unsigned int)(len - 1) / 32;
+            do {
+                acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
+            } while (i-- != 0);
+        }
+#else
         if (len > 32) {
             if (len > 64) {
                 if (len > 96) {
@@ -5259,6 +6581,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
             acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
         }
         acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
+#endif
         {   XXH128_hash_t h128;
             h128.low64  = acc.low64 + acc.high64;
             h128.high64 = (acc.low64    * XXH_PRIME64_1)
@@ -5271,7 +6594,7 @@ XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     }
 }
 
-XXH_NO_INLINE XXH128_hash_t
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
 XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                        const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
                        XXH64_hash_t seed)
@@ -5280,25 +6603,34 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
     XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
 
     {   XXH128_hash_t acc;
-        int const nbRounds = (int)len / 32;
-        int i;
+        unsigned i;
         acc.low64 = len * XXH_PRIME64_1;
         acc.high64 = 0;
-        for (i=0; i<4; i++) {
+        /*
+         *  We set as `i` as offset + 32. We do this so that unchanged
+         * `len` can be used as upper bound. This reaches a sweet spot
+         * where both x86 and aarch64 get simple agen and good codegen
+         * for the loop.
+         */
+        for (i = 32; i < 160; i += 32) {
             acc = XXH128_mix32B(acc,
-                                input  + (32 * i),
-                                input  + (32 * i) + 16,
-                                secret + (32 * i),
+                                input  + i - 32,
+                                input  + i - 16,
+                                secret + i - 32,
                                 seed);
         }
         acc.low64 = XXH3_avalanche(acc.low64);
         acc.high64 = XXH3_avalanche(acc.high64);
-        XXH_ASSERT(nbRounds >= 4);
-        for (i=4 ; i < nbRounds; i++) {
+        /*
+         * NB: `i <= len` will duplicate the last 32-bytes if
+         * len % 32 was zero. This is an unfortunate necessity to keep
+         * the hash result stable.
+         */
+        for (i=160; i <= len; i += 32) {
             acc = XXH128_mix32B(acc,
-                                input + (32 * i),
-                                input + (32 * i) + 16,
-                                secret + XXH3_MIDSIZE_STARTOFFSET + (32 * (i - 4)),
+                                input + i - 32,
+                                input + i - 16,
+                                secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
                                 seed);
         }
         /* last bytes */
@@ -5306,7 +6638,7 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
                             input + len - 16,
                             input + len - 32,
                             secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
-                            0ULL - seed);
+                            (XXH64_hash_t)0 - seed);
 
         {   XXH128_hash_t h128;
             h128.low64  = acc.low64 + acc.high64;
@@ -5323,12 +6655,12 @@ XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
 XXH_FORCE_INLINE XXH128_hash_t
 XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
                             const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate_512 f_acc512,
+                            XXH3_f_accumulate f_acc,
                             XXH3_f_scrambleAcc f_scramble)
 {
     XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
 
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc512, f_scramble);
+    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
 
     /* converge into final hash */
     XXH_STATIC_ASSERT(sizeof(acc) == 64);
@@ -5346,47 +6678,50 @@ XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
 }
 
 /*
- * It's important for performance that XXH3_hashLong is not inlined.
+ * It's important for performance that XXH3_hashLong() is not inlined.
  */
-XXH_NO_INLINE XXH128_hash_t
+XXH_NO_INLINE XXH_PUREF XXH128_hash_t
 XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
                            XXH64_hash_t seed64,
                            const void* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64; (void)secret; (void)secretLen;
     return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
-                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 /*
- * It's important for performance to pass @secretLen (when it's static)
+ * It's important for performance to pass @p secretLen (when it's static)
  * to the compiler, so that it can properly optimize the vectorized loop.
+ *
+ * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
+ * breaks -Og, this is XXH_NO_INLINE.
  */
-XXH_FORCE_INLINE XXH128_hash_t
+XXH3_WITH_SECRET_INLINE XXH128_hash_t
 XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
                               XXH64_hash_t seed64,
                               const void* XXH_RESTRICT secret, size_t secretLen)
 {
     (void)seed64;
     return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
-                                       XXH3_accumulate_512, XXH3_scrambleAcc);
+                                       XXH3_accumulate, XXH3_scrambleAcc);
 }
 
 XXH_FORCE_INLINE XXH128_hash_t
 XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
                                 XXH64_hash_t seed64,
-                                XXH3_f_accumulate_512 f_acc512,
+                                XXH3_f_accumulate f_acc,
                                 XXH3_f_scrambleAcc f_scramble,
                                 XXH3_f_initCustomSecret f_initSec)
 {
     if (seed64 == 0)
         return XXH3_hashLong_128b_internal(input, len,
                                            XXH3_kSecret, sizeof(XXH3_kSecret),
-                                           f_acc512, f_scramble);
+                                           f_acc, f_scramble);
     {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
         f_initSec(secret, seed64);
         return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
-                                           f_acc512, f_scramble);
+                                           f_acc, f_scramble);
     }
 }
 
@@ -5399,7 +6734,7 @@ XXH3_hashLong_128b_withSeed(const void* input, size_t len,
 {
     (void)secret; (void)secretLen;
     return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
-                XXH3_accumulate_512, XXH3_scrambleAcc, XXH3_initCustomSecret);
+                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
 }
 
 typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
@@ -5429,94 +6764,93 @@ XXH3_128bits_internal(const void* input, size_t len,
 
 /* ===   Public XXH128 API   === */
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(const void* input, size_t len)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
 {
     return XXH3_128bits_internal(input, len, 0,
                                  XXH3_kSecret, sizeof(XXH3_kSecret),
                                  XXH3_hashLong_128b_default);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecret(const void* input, size_t len, const void* secret, size_t secretSize)
+XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     return XXH3_128bits_internal(input, len, 0,
                                  (const xxh_u8*)secret, secretSize,
                                  XXH3_hashLong_128b_withSecret);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSeed(const void* input, size_t len, XXH64_hash_t seed)
+XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
     return XXH3_128bits_internal(input, len, seed,
                                  XXH3_kSecret, sizeof(XXH3_kSecret),
                                  XXH3_hashLong_128b_withSeed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(const void* input, size_t len, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
     if (len <= XXH3_MIDSIZE_MAX)
         return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
     return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH128(const void* input, size_t len, XXH64_hash_t seed)
+XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
 {
     return XXH3_128bits_withSeed(input, len, seed);
 }
 
 
 /* ===   XXH3 128-bit streaming   === */
-
+#ifndef XXH_NO_STREAM
 /*
  * All initialization and update functions are identical to 64-bit streaming variant.
  * The only difference is the finalization routine.
  */
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset(XXH3_state_t* statePtr)
+XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
 {
     return XXH3_64bits_reset(statePtr);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecret(XXH3_state_t* statePtr, const void* secret, size_t secretSize)
+XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
 {
     return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSeed(XXH3_state_t* statePtr, XXH64_hash_t seed)
+XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
 {
     return XXH3_64bits_reset_withSeed(statePtr, seed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH3_state_t* statePtr, const void* secret, size_t secretSize, XXH64_hash_t seed)
+XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
 {
     return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_update(XXH3_state_t* state, const void* input, size_t len)
+XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
 {
-    return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate_512, XXH3_scrambleAcc);
+    return XXH3_64bits_update(state, input, len);
 }
 
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
 {
     const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
     if (state->totalLen > XXH3_MIDSIZE_MAX) {
@@ -5540,13 +6874,13 @@ XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (const XXH3_state_t* state)
     return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
                                    secret, state->secretLimit + XXH_STRIPE_LEN);
 }
-
+#endif /* !XXH_NO_STREAM */
 /* 128-bit utility functions */
 
 #include <string.h>   /* memcmp, memcpy */
 
 /* return : 1 is equal, 0 if different */
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
 {
     /* note : XXH128_hash_t is compact, it has no padding byte */
@@ -5554,11 +6888,11 @@ XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
 }
 
 /* This prototype is compatible with stdlib's qsort().
- * return : >0 if *h128_1  > *h128_2
- *          <0 if *h128_1  < *h128_2
- *          =0 if *h128_1 == *h128_2  */
-/*! @ingroup xxh3_family */
-XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
+ * @return : >0 if *h128_1  > *h128_2
+ *           <0 if *h128_1  < *h128_2
+ *           =0 if *h128_1 == *h128_2  */
+/*! @ingroup XXH3_family */
+XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
 {
     XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
     XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
@@ -5570,9 +6904,9 @@ XXH_PUBLIC_API int XXH128_cmp(const void* h128_1, const void* h128_2)
 
 
 /*======   Canonical representation   ======*/
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
+XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
 {
     XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
     if (XXH_CPU_LITTLE_ENDIAN) {
@@ -5583,9 +6917,9 @@ XXH128_canonicalFromHash(XXH128_canonical_t* dst, XXH128_hash_t hash)
     XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH128_hash_t
-XXH128_hashFromCanonical(const XXH128_canonical_t* src)
+XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
 {
     XXH128_hash_t h;
     h.high64 = XXH_readBE64(src);
@@ -5607,9 +6941,9 @@ XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
     XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API XXH_errorcode
-XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSeed, size_t customSeedSize)
+XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
 {
 #if (XXH_DEBUGLEVEL >= 1)
     XXH_ASSERT(secretBuffer != NULL);
@@ -5652,9 +6986,9 @@ XXH3_generateSecret(void* secretBuffer, size_t secretSize, const void* customSee
     return XXH_OK;
 }
 
-/*! @ingroup xxh3_family */
+/*! @ingroup XXH3_family */
 XXH_PUBLIC_API void
-XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
+XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
 {
     XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
     XXH3_initCustomSecret(secret, seed);
@@ -5667,7 +7001,7 @@ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
 /* Pop our optimization override from above */
 #if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
   && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) /* respect -O0 and -Os */
+  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
 #  pragma GCC pop_options
 #endif
 
@@ -5682,5 +7016,5 @@ XXH3_generateSecret_fromSeed(void* secretBuffer, XXH64_hash_t seed)
 
 
 #if defined (__cplusplus)
-}
+} /* extern "C" */
 #endif
diff --git a/thirdparty/zstd/common/zstd_internal.h b/thirdparty/zstd/common/zstd_internal.h
index 1f942f27bf..ecb9cfba87 100644
--- a/thirdparty/zstd/common/zstd_internal.h
+++ b/thirdparty/zstd/common/zstd_internal.h
@@ -178,7 +178,7 @@ static void ZSTD_copy8(void* dst, const void* src) {
     ZSTD_memcpy(dst, src, 8);
 #endif
 }
-#define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+#define COPY8(d,s) do { ZSTD_copy8(d,s); d+=8; s+=8; } while (0)
 
 /* Need to use memmove here since the literal buffer can now be located within
    the dst buffer. In circumstances where the op "catches up" to where the
@@ -198,7 +198,7 @@ static void ZSTD_copy16(void* dst, const void* src) {
     ZSTD_memcpy(dst, copy16_buf, 16);
 #endif
 }
-#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
+#define COPY16(d,s) do { ZSTD_copy16(d,s); d+=16; s+=16; } while (0)
 
 #define WILDCOPY_OVERLENGTH 32
 #define WILDCOPY_VECLEN 16
@@ -227,7 +227,7 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
         /* Handle short offset copies. */
         do {
-            COPY8(op, ip)
+            COPY8(op, ip);
         } while (op < oend);
     } else {
         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
@@ -366,13 +366,13 @@ typedef struct {
 
 /*! ZSTD_getcBlockSize() :
  *  Provides the size of compressed block from block header `src` */
-/* Used by: decompress, fullbench (does not get its definition from here) */
+/*  Used by: decompress, fullbench */
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
                           blockProperties_t* bpPtr);
 
 /*! ZSTD_decodeSeqHeaders() :
  *  decode sequence header from src */
-/* Used by: decompress, fullbench (does not get its definition from here) */
+/*  Used by: zstd_decompress_block, fullbench */
 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                        const void* src, size_t srcSize);
 
diff --git a/thirdparty/zstd/compress/fse_compress.c b/thirdparty/zstd/compress/fse_compress.c
index 5d3770808d..1ce3cf16ac 100644
--- a/thirdparty/zstd/compress/fse_compress.c
+++ b/thirdparty/zstd/compress/fse_compress.c
@@ -25,7 +25,7 @@
 #include "../common/error_private.h"
 #define ZSTD_DEPS_NEED_MALLOC
 #define ZSTD_DEPS_NEED_MATH64
-#include "../common/zstd_deps.h"  /* ZSTD_malloc, ZSTD_free, ZSTD_memcpy, ZSTD_memset */
+#include "../common/zstd_deps.h"  /* ZSTD_memset */
 #include "../common/bits.h" /* ZSTD_highbit32 */
 
 
@@ -225,8 +225,8 @@ size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
     size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
                                    + 4 /* bitCount initialized at 4 */
                                    + 2 /* first two symbols may use one additional bit each */) / 8)
-                                    + 1 /* round up to whole nb bytes */
-                                    + 2 /* additional two bytes for bitstream flush */;
+                                   + 1 /* round up to whole nb bytes */
+                                   + 2 /* additional two bytes for bitstream flush */;
     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
 }
 
@@ -255,7 +255,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
     /* Init */
     remaining = tableSize+1;   /* +1 for extra accuracy */
     threshold = tableSize;
-    nbBits = tableLog+1;
+    nbBits = (int)tableLog+1;
 
     while ((symbol < alphabetSize) && (remaining>1)) {  /* stops at 1 */
         if (previousIs0) {
@@ -274,7 +274,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
             }
             while (symbol >= start+3) {
                 start+=3;
-                bitStream += 3 << bitCount;
+                bitStream += 3U << bitCount;
                 bitCount += 2;
             }
             bitStream += (symbol-start) << bitCount;
@@ -294,7 +294,7 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
             count++;   /* +1 for extra accuracy */
             if (count>=threshold)
                 count += max;   /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
-            bitStream += count << bitCount;
+            bitStream += (U32)count << bitCount;
             bitCount  += nbBits;
             bitCount  -= (count<max);
             previousIs0  = (count==1);
@@ -322,7 +322,8 @@ FSE_writeNCount_generic (void* header, size_t headerBufferSize,
     out[1] = (BYTE)(bitStream>>8);
     out+= (bitCount+7) /8;
 
-    return (out-ostart);
+    assert(out >= ostart);
+    return (size_t)(out-ostart);
 }
 
 
diff --git a/thirdparty/zstd/compress/huf_compress.c b/thirdparty/zstd/compress/huf_compress.c
index 29871877a7..ea00072320 100644
--- a/thirdparty/zstd/compress/huf_compress.c
+++ b/thirdparty/zstd/compress/huf_compress.c
@@ -220,6 +220,25 @@ static void HUF_setValue(HUF_CElt* elt, size_t value)
     }
 }
 
+HUF_CTableHeader HUF_readCTableHeader(HUF_CElt const* ctable)
+{
+    HUF_CTableHeader header;
+    ZSTD_memcpy(&header, ctable, sizeof(header));
+    return header;
+}
+
+static void HUF_writeCTableHeader(HUF_CElt* ctable, U32 tableLog, U32 maxSymbolValue)
+{
+    HUF_CTableHeader header;
+    HUF_STATIC_ASSERT(sizeof(ctable[0]) == sizeof(header));
+    ZSTD_memset(&header, 0, sizeof(header));
+    assert(tableLog < 256);
+    header.tableLog = (BYTE)tableLog;
+    assert(maxSymbolValue < 256);
+    header.maxSymbolValue = (BYTE)maxSymbolValue;
+    ZSTD_memcpy(ctable, &header, sizeof(header));
+}
+
 typedef struct {
     HUF_CompressWeightsWksp wksp;
     BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];   /* precomputed conversion table */
@@ -237,6 +256,9 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
 
     HUF_STATIC_ASSERT(HUF_CTABLE_WORKSPACE_SIZE >= sizeof(HUF_WriteCTableWksp));
 
+    assert(HUF_readCTableHeader(CTable).maxSymbolValue == maxSymbolValue);
+    assert(HUF_readCTableHeader(CTable).tableLog == huffLog);
+
     /* check conditions */
     if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
     if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge);
@@ -283,7 +305,9 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
 
-    CTable[0] = tableLog;
+    *maxSymbolValuePtr = nbSymbols - 1;
+
+    HUF_writeCTableHeader(CTable, tableLog, *maxSymbolValuePtr);
 
     /* Prepare base value per rank */
     {   U32 n, nextRankStart = 0;
@@ -315,7 +339,6 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
         { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
     }
 
-    *maxSymbolValuePtr = nbSymbols - 1;
     return readSize;
 }
 
@@ -323,6 +346,8 @@ U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
 {
     const HUF_CElt* const ct = CTable + 1;
     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
+    if (symbolValue > HUF_readCTableHeader(CTable).maxSymbolValue)
+        return 0;
     return (U32)HUF_getNbBits(ct[symbolValue]);
 }
 
@@ -723,7 +748,8 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
         HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits);   /* push nbBits per symbol, symbol order */
     for (n=0; n<alphabetSize; n++)
         HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++);   /* assign value within rank, symbol order */
-    CTable[0] = maxNbBits;
+
+    HUF_writeCTableHeader(CTable, maxNbBits, maxSymbolValue);
 }
 
 size_t
@@ -776,13 +802,20 @@ size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count,
 }
 
 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
-  HUF_CElt const* ct = CTable + 1;
-  int bad = 0;
-  int s;
-  for (s = 0; s <= (int)maxSymbolValue; ++s) {
-    bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
-  }
-  return !bad;
+    HUF_CTableHeader header = HUF_readCTableHeader(CTable);
+    HUF_CElt const* ct = CTable + 1;
+    int bad = 0;
+    int s;
+
+    assert(header.tableLog <= HUF_TABLELOG_ABSOLUTEMAX);
+
+    if (header.maxSymbolValue < maxSymbolValue)
+        return 0;
+
+    for (s = 0; s <= (int)maxSymbolValue; ++s) {
+        bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
+    }
+    return !bad;
 }
 
 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
@@ -1024,17 +1057,17 @@ HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
                                    const void* src, size_t srcSize,
                                    const HUF_CElt* CTable)
 {
-    U32 const tableLog = (U32)CTable[0];
+    U32 const tableLog = HUF_readCTableHeader(CTable).tableLog;
     HUF_CElt const* ct = CTable + 1;
     const BYTE* ip = (const BYTE*) src;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstSize;
-    BYTE* op = ostart;
     HUF_CStream_t bitC;
 
     /* init */
     if (dstSize < 8) return 0;   /* not enough space to compress */
-    { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
+    { BYTE* op = ostart;
+      size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
       if (HUF_isError(initErr)) return 0; }
 
     if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
@@ -1255,7 +1288,7 @@ unsigned HUF_optimalTableLog(
 
     {   BYTE* dst = (BYTE*)workSpace + sizeof(HUF_WriteCTableWksp);
         size_t dstSize = wkspSize - sizeof(HUF_WriteCTableWksp);
-        size_t maxBits, hSize, newSize;
+        size_t hSize, newSize;
         const unsigned symbolCardinality = HUF_cardinality(count, maxSymbolValue);
         const unsigned minTableLog = HUF_minTableLog(symbolCardinality);
         size_t optSize = ((size_t) ~0) - 1;
@@ -1266,12 +1299,14 @@ unsigned HUF_optimalTableLog(
         /* Search until size increases */
         for (optLogGuess = minTableLog; optLogGuess <= maxTableLog; optLogGuess++) {
             DEBUGLOG(7, "checking for huffLog=%u", optLogGuess);
-            maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
-            if (ERR_isError(maxBits)) continue;
 
-            if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
+            {   size_t maxBits = HUF_buildCTable_wksp(table, count, maxSymbolValue, optLogGuess, workSpace, wkspSize);
+                if (ERR_isError(maxBits)) continue;
+
+                if (maxBits < optLogGuess && optLogGuess > minTableLog) break;
 
-            hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+                hSize = HUF_writeCTable_wksp(dst, dstSize, table, maxSymbolValue, (U32)maxBits, workSpace, wkspSize);
+            }
 
             if (ERR_isError(hSize)) continue;
 
@@ -1372,12 +1407,6 @@ HUF_compress_internal (void* dst, size_t dstSize,
         huffLog = (U32)maxBits;
         DEBUGLOG(6, "bit distribution completed (%zu symbols)", showCTableBits(table->CTable + 1, maxSymbolValue+1));
     }
-    /* Zero unused symbols in CTable, so we can check it for validity */
-    {
-        size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
-        size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
-        ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
-    }
 
     /* Write table description header */
     {   CHECK_V_F(hSize, HUF_writeCTable_wksp(op, dstSize, table->CTable, maxSymbolValue, huffLog,
@@ -1420,7 +1449,7 @@ size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
 /* HUF_compress4X_repeat():
  * compress input using 4 streams.
  * consider skipping quickly
- * re-use an existing huffman compression table */
+ * reuse an existing huffman compression table */
 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
                       const void* src, size_t srcSize,
                       unsigned maxSymbolValue, unsigned huffLog,
diff --git a/thirdparty/zstd/compress/zstd_compress.c b/thirdparty/zstd/compress/zstd_compress.c
index d6133e70b4..9284e2a480 100644
--- a/thirdparty/zstd/compress/zstd_compress.c
+++ b/thirdparty/zstd/compress/zstd_compress.c
@@ -178,6 +178,7 @@ static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
 
 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
 {
+    DEBUGLOG(3, "ZSTD_freeCCtx (address: %p)", (void*)cctx);
     if (cctx==NULL) return 0;   /* support free on NULL */
     RETURN_ERROR_IF(cctx->staticSize, memory_allocation,
                     "not compatible with static CCtx");
@@ -649,10 +650,11 @@ static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value)
     return 0;
 }
 
-#define BOUNDCHECK(cParam, val) { \
-    RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \
-                    parameter_outOfBound, "Param out of bounds"); \
-}
+#define BOUNDCHECK(cParam, val)                                       \
+    do {                                                              \
+        RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val),        \
+                        parameter_outOfBound, "Param out of bounds"); \
+    } while (0)
 
 
 static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
@@ -868,7 +870,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
 #else
         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value), "");
         CCtxParams->nbWorkers = value;
-        return CCtxParams->nbWorkers;
+        return (size_t)(CCtxParams->nbWorkers);
 #endif
 
     case ZSTD_c_jobSize :
@@ -892,7 +894,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
 #else
         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
         CCtxParams->overlapLog = value;
-        return CCtxParams->overlapLog;
+        return (size_t)CCtxParams->overlapLog;
 #endif
 
     case ZSTD_c_rsyncable :
@@ -902,7 +904,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
 #else
         FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value), "");
         CCtxParams->rsyncable = value;
-        return CCtxParams->rsyncable;
+        return (size_t)CCtxParams->rsyncable;
 #endif
 
     case ZSTD_c_enableDedicatedDictSearch :
@@ -939,8 +941,10 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         return CCtxParams->ldmParams.hashRateLog;
 
     case ZSTD_c_targetCBlockSize :
-        if (value!=0)   /* 0 ==> default */
+        if (value!=0) {  /* 0 ==> default */
+            value = MAX(value, ZSTD_TARGETCBLOCKSIZE_MIN);
             BOUNDCHECK(ZSTD_c_targetCBlockSize, value);
+        }
         CCtxParams->targetCBlockSize = (U32)value;
         return CCtxParams->targetCBlockSize;
 
@@ -968,7 +972,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     case ZSTD_c_validateSequences:
         BOUNDCHECK(ZSTD_c_validateSequences, value);
         CCtxParams->validateSequences = value;
-        return CCtxParams->validateSequences;
+        return (size_t)CCtxParams->validateSequences;
 
     case ZSTD_c_useBlockSplitter:
         BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
@@ -983,7 +987,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     case ZSTD_c_deterministicRefPrefix:
         BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
         CCtxParams->deterministicRefPrefix = !!value;
-        return CCtxParams->deterministicRefPrefix;
+        return (size_t)CCtxParams->deterministicRefPrefix;
 
     case ZSTD_c_prefetchCDictTables:
         BOUNDCHECK(ZSTD_c_prefetchCDictTables, value);
@@ -993,7 +997,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     case ZSTD_c_enableSeqProducerFallback:
         BOUNDCHECK(ZSTD_c_enableSeqProducerFallback, value);
         CCtxParams->enableMatchFinderFallback = value;
-        return CCtxParams->enableMatchFinderFallback;
+        return (size_t)CCtxParams->enableMatchFinderFallback;
 
     case ZSTD_c_maxBlockSize:
         if (value!=0)    /* 0 ==> default */
@@ -1363,7 +1367,6 @@ size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset)
         RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
                         "Reset parameters is only possible during init stage.");
         ZSTD_clearAllDicts(cctx);
-        ZSTD_memset(&cctx->externalMatchCtx, 0, sizeof(cctx->externalMatchCtx));
         return ZSTD_CCtxParams_reset(&cctx->requestedParams);
     }
     return 0;
@@ -1391,11 +1394,12 @@ size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
 static ZSTD_compressionParameters
 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
 {
-#   define CLAMP_TYPE(cParam, val, type) {                                \
-        ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
-        if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
-        else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
-    }
+#   define CLAMP_TYPE(cParam, val, type)                                      \
+        do {                                                                  \
+            ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam);         \
+            if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound;      \
+            else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \
+        } while (0)
 #   define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned)
     CLAMP(ZSTD_c_windowLog, cParams.windowLog);
     CLAMP(ZSTD_c_chainLog,  cParams.chainLog);
@@ -1467,6 +1471,48 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
     const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
     assert(ZSTD_checkCParams(cPar)==0);
 
+    /* Cascade the selected strategy down to the next-highest one built into
+     * this binary. */
+#ifdef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+    if (cPar.strategy == ZSTD_btultra2) {
+        cPar.strategy = ZSTD_btultra;
+    }
+    if (cPar.strategy == ZSTD_btultra) {
+        cPar.strategy = ZSTD_btopt;
+    }
+#endif
+#ifdef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
+    if (cPar.strategy == ZSTD_btopt) {
+        cPar.strategy = ZSTD_btlazy2;
+    }
+#endif
+#ifdef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+    if (cPar.strategy == ZSTD_btlazy2) {
+        cPar.strategy = ZSTD_lazy2;
+    }
+#endif
+#ifdef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+    if (cPar.strategy == ZSTD_lazy2) {
+        cPar.strategy = ZSTD_lazy;
+    }
+#endif
+#ifdef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+    if (cPar.strategy == ZSTD_lazy) {
+        cPar.strategy = ZSTD_greedy;
+    }
+#endif
+#ifdef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+    if (cPar.strategy == ZSTD_greedy) {
+        cPar.strategy = ZSTD_dfast;
+    }
+#endif
+#ifdef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+    if (cPar.strategy == ZSTD_dfast) {
+        cPar.strategy = ZSTD_fast;
+        cPar.targetLength = 0;
+    }
+#endif
+
     switch (mode) {
     case ZSTD_cpm_unknown:
     case ZSTD_cpm_noAttachDict:
@@ -1617,8 +1663,8 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
       + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
       + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
       + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
-      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
-      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+      + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_aligned_alloc_size(ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
     size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
                                             ? ZSTD_cwksp_aligned_alloc_size(hSize)
                                             : 0;
@@ -1707,7 +1753,7 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
      * be needed. However, we still allocate two 0-sized buffers, which can
      * take space under ASAN. */
     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
+        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
 }
 
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
@@ -1768,7 +1814,7 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
 
         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
             &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
-            ZSTD_CONTENTSIZE_UNKNOWN, params->useSequenceProducer, params->maxBlockSize);
+            ZSTD_CONTENTSIZE_UNKNOWN, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
     }
 }
 
@@ -2001,8 +2047,8 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
         ms->opt.litLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxLL+1) * sizeof(unsigned));
         ms->opt.matchLengthFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxML+1) * sizeof(unsigned));
         ms->opt.offCodeFreq = (unsigned*)ZSTD_cwksp_reserve_aligned(ws, (MaxOff+1) * sizeof(unsigned));
-        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t));
-        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+        ms->opt.matchTable = (ZSTD_match_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_match_t));
+        ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, ZSTD_OPT_SIZE * sizeof(ZSTD_optimal_t));
     }
 
     ms->cParams = *cParams;
@@ -2074,7 +2120,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
 
     {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
         size_t const blockSize = MIN(params->maxBlockSize, windowSize);
-        size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, params->useSequenceProducer);
+        size_t const maxNbSeq = ZSTD_maxNbSeq(blockSize, params->cParams.minMatch, ZSTD_hasExtSeqProd(params));
         size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
@@ -2091,8 +2137,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         size_t const neededSpace =
             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
                 &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
-                buffInSize, buffOutSize, pledgedSrcSize, params->useSequenceProducer, params->maxBlockSize);
-        int resizeWorkspace;
+                buffInSize, buffOutSize, pledgedSrcSize, ZSTD_hasExtSeqProd(params), params->maxBlockSize);
 
         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
 
@@ -2101,7 +2146,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         {   /* Check if workspace is large enough, alloc a new one if needed */
             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
-            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
+            int resizeWorkspace = workspaceTooSmall || workspaceWasteful;
             DEBUGLOG(4, "Need %zu B workspace", neededSpace);
             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
 
@@ -2176,10 +2221,10 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         }
 
         /* reserve space for block-level external sequences */
-        if (params->useSequenceProducer) {
+        if (ZSTD_hasExtSeqProd(params)) {
             size_t const maxNbExternalSeq = ZSTD_sequenceBound(blockSize);
-            zc->externalMatchCtx.seqBufferCapacity = maxNbExternalSeq;
-            zc->externalMatchCtx.seqBuffer =
+            zc->extSeqBufCapacity = maxNbExternalSeq;
+            zc->extSeqBuf =
                 (ZSTD_Sequence*)ZSTD_cwksp_reserve_aligned(ws, maxNbExternalSeq * sizeof(ZSTD_Sequence));
         }
 
@@ -2564,7 +2609,7 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
     assert(size < (1U<<31));   /* can be casted to int */
 
 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the table re-use logic is sound, and that we don't
+    /* To validate that the table reuse logic is sound, and that we don't
      * access table space that we haven't cleaned, we re-"poison" the table
      * space every time we mark it dirty.
      *
@@ -2992,40 +3037,43 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
     static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
         { ZSTD_compressBlock_fast  /* default for 0 */,
           ZSTD_compressBlock_fast,
-          ZSTD_compressBlock_doubleFast,
-          ZSTD_compressBlock_greedy,
-          ZSTD_compressBlock_lazy,
-          ZSTD_compressBlock_lazy2,
-          ZSTD_compressBlock_btlazy2,
-          ZSTD_compressBlock_btopt,
-          ZSTD_compressBlock_btultra,
-          ZSTD_compressBlock_btultra2 },
+          ZSTD_COMPRESSBLOCK_DOUBLEFAST,
+          ZSTD_COMPRESSBLOCK_GREEDY,
+          ZSTD_COMPRESSBLOCK_LAZY,
+          ZSTD_COMPRESSBLOCK_LAZY2,
+          ZSTD_COMPRESSBLOCK_BTLAZY2,
+          ZSTD_COMPRESSBLOCK_BTOPT,
+          ZSTD_COMPRESSBLOCK_BTULTRA,
+          ZSTD_COMPRESSBLOCK_BTULTRA2
+        },
         { ZSTD_compressBlock_fast_extDict  /* default for 0 */,
           ZSTD_compressBlock_fast_extDict,
-          ZSTD_compressBlock_doubleFast_extDict,
-          ZSTD_compressBlock_greedy_extDict,
-          ZSTD_compressBlock_lazy_extDict,
-          ZSTD_compressBlock_lazy2_extDict,
-          ZSTD_compressBlock_btlazy2_extDict,
-          ZSTD_compressBlock_btopt_extDict,
-          ZSTD_compressBlock_btultra_extDict,
-          ZSTD_compressBlock_btultra_extDict },
+          ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT,
+          ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT,
+          ZSTD_COMPRESSBLOCK_LAZY_EXTDICT,
+          ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT,
+          ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT,
+          ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT,
+          ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT,
+          ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT
+        },
         { ZSTD_compressBlock_fast_dictMatchState  /* default for 0 */,
           ZSTD_compressBlock_fast_dictMatchState,
-          ZSTD_compressBlock_doubleFast_dictMatchState,
-          ZSTD_compressBlock_greedy_dictMatchState,
-          ZSTD_compressBlock_lazy_dictMatchState,
-          ZSTD_compressBlock_lazy2_dictMatchState,
-          ZSTD_compressBlock_btlazy2_dictMatchState,
-          ZSTD_compressBlock_btopt_dictMatchState,
-          ZSTD_compressBlock_btultra_dictMatchState,
-          ZSTD_compressBlock_btultra_dictMatchState },
+          ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE,
+          ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE,
+          ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE,
+          ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE,
+          ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE,
+          ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE,
+          ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE,
+          ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE
+        },
         { NULL  /* default for 0 */,
           NULL,
           NULL,
-          ZSTD_compressBlock_greedy_dedicatedDictSearch,
-          ZSTD_compressBlock_lazy_dedicatedDictSearch,
-          ZSTD_compressBlock_lazy2_dedicatedDictSearch,
+          ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH,
+          ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH,
+          ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH,
           NULL,
           NULL,
           NULL,
@@ -3038,18 +3086,26 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramS
     DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
     if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
         static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
-            { ZSTD_compressBlock_greedy_row,
-            ZSTD_compressBlock_lazy_row,
-            ZSTD_compressBlock_lazy2_row },
-            { ZSTD_compressBlock_greedy_extDict_row,
-            ZSTD_compressBlock_lazy_extDict_row,
-            ZSTD_compressBlock_lazy2_extDict_row },
-            { ZSTD_compressBlock_greedy_dictMatchState_row,
-            ZSTD_compressBlock_lazy_dictMatchState_row,
-            ZSTD_compressBlock_lazy2_dictMatchState_row },
-            { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
-            ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
-            ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
+            {
+                ZSTD_COMPRESSBLOCK_GREEDY_ROW,
+                ZSTD_COMPRESSBLOCK_LAZY_ROW,
+                ZSTD_COMPRESSBLOCK_LAZY2_ROW
+            },
+            {
+                ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW,
+                ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW,
+                ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW
+            },
+            {
+                ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW,
+                ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW,
+                ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW
+            },
+            {
+                ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW,
+                ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW,
+                ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW
+            }
         };
         DEBUGLOG(4, "Selecting a row-based matchfinder");
         assert(useRowMatchFinder != ZSTD_ps_auto);
@@ -3192,7 +3248,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
             /* External matchfinder + LDM is technically possible, just not implemented yet.
              * We need to revisit soon and implement it. */
             RETURN_ERROR_IF(
-                zc->appliedParams.useSequenceProducer,
+                ZSTD_hasExtSeqProd(&zc->appliedParams),
                 parameter_combination_unsupported,
                 "Long-distance matching with external sequence producer enabled is not currently supported."
             );
@@ -3211,7 +3267,7 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
             /* External matchfinder + LDM is technically possible, just not implemented yet.
              * We need to revisit soon and implement it. */
             RETURN_ERROR_IF(
-                zc->appliedParams.useSequenceProducer,
+                ZSTD_hasExtSeqProd(&zc->appliedParams),
                 parameter_combination_unsupported,
                 "Long-distance matching with external sequence producer enabled is not currently supported."
             );
@@ -3230,18 +3286,18 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                                        zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(ldmSeqStore.pos == ldmSeqStore.size);
-        } else if (zc->appliedParams.useSequenceProducer) {
+        } else if (ZSTD_hasExtSeqProd(&zc->appliedParams)) {
             assert(
-                zc->externalMatchCtx.seqBufferCapacity >= ZSTD_sequenceBound(srcSize)
+                zc->extSeqBufCapacity >= ZSTD_sequenceBound(srcSize)
             );
-            assert(zc->externalMatchCtx.mFinder != NULL);
+            assert(zc->appliedParams.extSeqProdFunc != NULL);
 
             {   U32 const windowSize = (U32)1 << zc->appliedParams.cParams.windowLog;
 
-                size_t const nbExternalSeqs = (zc->externalMatchCtx.mFinder)(
-                    zc->externalMatchCtx.mState,
-                    zc->externalMatchCtx.seqBuffer,
-                    zc->externalMatchCtx.seqBufferCapacity,
+                size_t const nbExternalSeqs = (zc->appliedParams.extSeqProdFunc)(
+                    zc->appliedParams.extSeqProdState,
+                    zc->extSeqBuf,
+                    zc->extSeqBufCapacity,
                     src, srcSize,
                     NULL, 0,  /* dict and dictSize, currently not supported */
                     zc->appliedParams.compressionLevel,
@@ -3249,21 +3305,21 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                 );
 
                 size_t const nbPostProcessedSeqs = ZSTD_postProcessSequenceProducerResult(
-                    zc->externalMatchCtx.seqBuffer,
+                    zc->extSeqBuf,
                     nbExternalSeqs,
-                    zc->externalMatchCtx.seqBufferCapacity,
+                    zc->extSeqBufCapacity,
                     srcSize
                 );
 
                 /* Return early if there is no error, since we don't need to worry about last literals */
                 if (!ZSTD_isError(nbPostProcessedSeqs)) {
                     ZSTD_sequencePosition seqPos = {0,0,0};
-                    size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs);
+                    size_t const seqLenSum = ZSTD_fastSequenceLengthSum(zc->extSeqBuf, nbPostProcessedSeqs);
                     RETURN_ERROR_IF(seqLenSum > srcSize, externalSequences_invalid, "External sequences imply too large a block!");
                     FORWARD_IF_ERROR(
                         ZSTD_copySequencesToSeqStoreExplicitBlockDelim(
                             zc, &seqPos,
-                            zc->externalMatchCtx.seqBuffer, nbPostProcessedSeqs,
+                            zc->extSeqBuf, nbPostProcessedSeqs,
                             src, srcSize,
                             zc->appliedParams.searchForExternalRepcodes
                         ),
@@ -3280,9 +3336,11 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                 }
 
                 /* Fallback to software matchfinder */
-                {   ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
-                                                                                            zc->appliedParams.useRowMatchFinder,
-                                                                                            dictMode);
+                {   ZSTD_blockCompressor const blockCompressor =
+                        ZSTD_selectBlockCompressor(
+                            zc->appliedParams.cParams.strategy,
+                            zc->appliedParams.useRowMatchFinder,
+                            dictMode);
                     ms->ldmSeqStore = NULL;
                     DEBUGLOG(
                         5,
@@ -3292,9 +3350,10 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                     lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
             }   }
         } else {   /* not long range mode and no external matchfinder */
-            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
-                                                                                    zc->appliedParams.useRowMatchFinder,
-                                                                                    dictMode);
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(
+                    zc->appliedParams.cParams.strategy,
+                    zc->appliedParams.useRowMatchFinder,
+                    dictMode);
             ms->ldmSeqStore = NULL;
             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
         }
@@ -3304,29 +3363,38 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
     return ZSTDbss_compress;
 }
 
-static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
+static size_t ZSTD_copyBlockSequences(SeqCollector* seqCollector, const seqStore_t* seqStore, const U32 prevRepcodes[ZSTD_REP_NUM])
 {
-    const seqStore_t* seqStore = ZSTD_getSeqStore(zc);
-    const seqDef* seqStoreSeqs = seqStore->sequencesStart;
-    size_t seqStoreSeqSize = seqStore->sequences - seqStoreSeqs;
-    size_t seqStoreLiteralsSize = (size_t)(seqStore->lit - seqStore->litStart);
-    size_t literalsRead = 0;
-    size_t lastLLSize;
+    const seqDef* inSeqs = seqStore->sequencesStart;
+    const size_t nbInSequences = seqStore->sequences - inSeqs;
+    const size_t nbInLiterals = (size_t)(seqStore->lit - seqStore->litStart);
 
-    ZSTD_Sequence* outSeqs = &zc->seqCollector.seqStart[zc->seqCollector.seqIndex];
+    ZSTD_Sequence* outSeqs = seqCollector->seqIndex == 0 ? seqCollector->seqStart : seqCollector->seqStart + seqCollector->seqIndex;
+    const size_t nbOutSequences = nbInSequences + 1;
+    size_t nbOutLiterals = 0;
+    repcodes_t repcodes;
     size_t i;
-    repcodes_t updatedRepcodes;
 
-    assert(zc->seqCollector.seqIndex + 1 < zc->seqCollector.maxSequences);
-    /* Ensure we have enough space for last literals "sequence" */
-    assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
-    ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
-    for (i = 0; i < seqStoreSeqSize; ++i) {
-        U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
-        outSeqs[i].litLength = seqStoreSeqs[i].litLength;
-        outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
+    /* Bounds check that we have enough space for every input sequence
+     * and the block delimiter
+     */
+    assert(seqCollector->seqIndex <= seqCollector->maxSequences);
+    RETURN_ERROR_IF(
+        nbOutSequences > (size_t)(seqCollector->maxSequences - seqCollector->seqIndex),
+        dstSize_tooSmall,
+        "Not enough space to copy sequences");
+
+    ZSTD_memcpy(&repcodes, prevRepcodes, sizeof(repcodes));
+    for (i = 0; i < nbInSequences; ++i) {
+        U32 rawOffset;
+        outSeqs[i].litLength = inSeqs[i].litLength;
+        outSeqs[i].matchLength = inSeqs[i].mlBase + MINMATCH;
         outSeqs[i].rep = 0;
 
+        /* Handle the possible single length >= 64K
+         * There can only be one because we add MINMATCH to every match length,
+         * and blocks are at most 128K.
+         */
         if (i == seqStore->longLengthPos) {
             if (seqStore->longLengthType == ZSTD_llt_literalLength) {
                 outSeqs[i].litLength += 0x10000;
@@ -3335,41 +3403,55 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
             }
         }
 
-        if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
-            /* Derive the correct offset corresponding to a repcode */
-            outSeqs[i].rep = seqStoreSeqs[i].offBase;
+        /* Determine the raw offset given the offBase, which may be a repcode. */
+        if (OFFBASE_IS_REPCODE(inSeqs[i].offBase)) {
+            const U32 repcode = OFFBASE_TO_REPCODE(inSeqs[i].offBase);
+            assert(repcode > 0);
+            outSeqs[i].rep = repcode;
             if (outSeqs[i].litLength != 0) {
-                rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
+                rawOffset = repcodes.rep[repcode - 1];
             } else {
-                if (outSeqs[i].rep == 3) {
-                    rawOffset = updatedRepcodes.rep[0] - 1;
+                if (repcode == 3) {
+                    assert(repcodes.rep[0] > 1);
+                    rawOffset = repcodes.rep[0] - 1;
                 } else {
-                    rawOffset = updatedRepcodes.rep[outSeqs[i].rep];
+                    rawOffset = repcodes.rep[repcode];
                 }
             }
+        } else {
+            rawOffset = OFFBASE_TO_OFFSET(inSeqs[i].offBase);
         }
         outSeqs[i].offset = rawOffset;
-        /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
-           so we provide seqStoreSeqs[i].offset - 1 */
-        ZSTD_updateRep(updatedRepcodes.rep,
-                       seqStoreSeqs[i].offBase,
-                       seqStoreSeqs[i].litLength == 0);
-        literalsRead += outSeqs[i].litLength;
+
+        /* Update repcode history for the sequence */
+        ZSTD_updateRep(repcodes.rep,
+                       inSeqs[i].offBase,
+                       inSeqs[i].litLength == 0);
+
+        nbOutLiterals += outSeqs[i].litLength;
     }
     /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
      * If there are no last literals, then we'll emit (of: 0, ml: 0, ll: 0), which is a marker
      * for the block boundary, according to the API.
      */
-    assert(seqStoreLiteralsSize >= literalsRead);
-    lastLLSize = seqStoreLiteralsSize - literalsRead;
-    outSeqs[i].litLength = (U32)lastLLSize;
-    outSeqs[i].matchLength = outSeqs[i].offset = outSeqs[i].rep = 0;
-    seqStoreSeqSize++;
-    zc->seqCollector.seqIndex += seqStoreSeqSize;
+    assert(nbInLiterals >= nbOutLiterals);
+    {
+        const size_t lastLLSize = nbInLiterals - nbOutLiterals;
+        outSeqs[nbInSequences].litLength = (U32)lastLLSize;
+        outSeqs[nbInSequences].matchLength = 0;
+        outSeqs[nbInSequences].offset = 0;
+        assert(nbOutSequences == nbInSequences + 1);
+    }
+    seqCollector->seqIndex += nbOutSequences;
+    assert(seqCollector->seqIndex <= seqCollector->maxSequences);
+
+    return 0;
 }
 
 size_t ZSTD_sequenceBound(size_t srcSize) {
-    return (srcSize / ZSTD_MINMATCH_MIN) + 1;
+    const size_t maxNbSeq = (srcSize / ZSTD_MINMATCH_MIN) + 1;
+    const size_t maxNbDelims = (srcSize / ZSTD_BLOCKSIZE_MAX_MIN) + 1;
+    return maxNbSeq + maxNbDelims;
 }
 
 size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
@@ -3378,6 +3460,16 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
     const size_t dstCapacity = ZSTD_compressBound(srcSize);
     void* dst = ZSTD_customMalloc(dstCapacity, ZSTD_defaultCMem);
     SeqCollector seqCollector;
+    {
+        int targetCBlockSize;
+        FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_targetCBlockSize, &targetCBlockSize), "");
+        RETURN_ERROR_IF(targetCBlockSize != 0, parameter_unsupported, "targetCBlockSize != 0");
+    }
+    {
+        int nbWorkers;
+        FORWARD_IF_ERROR(ZSTD_CCtx_getParameter(zc, ZSTD_c_nbWorkers, &nbWorkers), "");
+        RETURN_ERROR_IF(nbWorkers != 0, parameter_unsupported, "nbWorkers != 0");
+    }
 
     RETURN_ERROR_IF(dst == NULL, memory_allocation, "NULL pointer!");
 
@@ -3387,8 +3479,12 @@ size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
     seqCollector.maxSequences = outSeqsSize;
     zc->seqCollector = seqCollector;
 
-    ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
-    ZSTD_customFree(dst, ZSTD_defaultCMem);
+    {
+        const size_t ret = ZSTD_compress2(zc, dst, dstCapacity, src, srcSize);
+        ZSTD_customFree(dst, ZSTD_defaultCMem);
+        FORWARD_IF_ERROR(ret, "ZSTD_compress2 failed");
+    }
+    assert(zc->seqCollector.seqIndex <= ZSTD_sequenceBound(srcSize));
     return zc->seqCollector.seqIndex;
 }
 
@@ -3981,8 +4077,9 @@ ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc,
         cSeqsSize = 1;
     }
 
+    /* Sequence collection not supported when block splitting */
     if (zc->seqCollector.collectSequences) {
-        ZSTD_copyBlockSequences(zc);
+        FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, seqStore, dRepOriginal.rep), "copyBlockSequences failed");
         ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
         return 0;
     }
@@ -4204,6 +4301,7 @@ ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
         if (bss == ZSTDbss_noCompress) {
             if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
                 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+            RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
             cSize = ZSTD_noCompressBlock(dst, dstCapacity, src, srcSize, lastBlock);
             FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
             DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
@@ -4236,11 +4334,15 @@ ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
 
     {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
         FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
-        if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; }
+        if (bss == ZSTDbss_noCompress) {
+            RETURN_ERROR_IF(zc->seqCollector.collectSequences, sequenceProducer_failed, "Uncompressible block");
+            cSize = 0;
+            goto out;
+        }
     }
 
     if (zc->seqCollector.collectSequences) {
-        ZSTD_copyBlockSequences(zc);
+        FORWARD_IF_ERROR(ZSTD_copyBlockSequences(&zc->seqCollector, ZSTD_getSeqStore(zc), zc->blockState.prevCBlock->rep), "copyBlockSequences failed");
         ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
         return 0;
     }
@@ -4553,19 +4655,15 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
     }
 }
 
-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
 {
-    RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
-                    "wrong cctx stage");
-    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
-                    parameter_unsupported,
-                    "incompatible with ldm");
+    assert(cctx->stage == ZSTDcs_init);
+    assert(nbSeq == 0 || cctx->appliedParams.ldmParams.enableLdm != ZSTD_ps_enable);
     cctx->externSeqStore.seq = seq;
     cctx->externSeqStore.size = nbSeq;
     cctx->externSeqStore.capacity = nbSeq;
     cctx->externSeqStore.pos = 0;
     cctx->externSeqStore.posInSequence = 0;
-    return 0;
 }
 
 
@@ -4760,12 +4858,19 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
         ZSTD_fillHashTable(ms, iend, dtlm, tfp);
         break;
     case ZSTD_dfast:
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
         ZSTD_fillDoubleHashTable(ms, iend, dtlm, tfp);
+#else
+        assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
         break;
 
     case ZSTD_greedy:
     case ZSTD_lazy:
     case ZSTD_lazy2:
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR)
         assert(srcSize >= HASH_READ_SIZE);
         if (ms->dedicatedDictSearch) {
             assert(ms->chainTable != NULL);
@@ -4782,14 +4887,23 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
                 DEBUGLOG(4, "Using chain-based hash table for lazy dict");
             }
         }
+#else
+        assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
         break;
 
     case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
     case ZSTD_btopt:
     case ZSTD_btultra:
     case ZSTD_btultra2:
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
         assert(srcSize >= HASH_READ_SIZE);
         ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
+#else
+        assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
         break;
 
     default:
@@ -4836,11 +4950,10 @@ size_t ZSTD_loadCEntropy(ZSTD_compressedBlockState_t* bs, void* workspace,
 
         /* We only set the loaded table as valid if it contains all non-zero
          * weights. Otherwise, we set it to check */
-        if (!hasZeroWeights)
+        if (!hasZeroWeights && maxSymbolValue == 255)
             bs->entropy.huf.repeatMode = HUF_repeat_valid;
 
         RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted, "");
-        RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted, "");
         dictPtr += hufHeaderSize;
     }
 
@@ -5107,14 +5220,13 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
 {
     BYTE* const ostart = (BYTE*)dst;
     BYTE* op = ostart;
-    size_t fhSize = 0;
 
     DEBUGLOG(4, "ZSTD_writeEpilogue");
     RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing");
 
     /* special case : empty frame */
     if (cctx->stage == ZSTDcs_init) {
-        fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
+        size_t fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, &cctx->appliedParams, 0, 0);
         FORWARD_IF_ERROR(fhSize, "ZSTD_writeFrameHeader failed");
         dstCapacity -= fhSize;
         op += fhSize;
@@ -5124,8 +5236,9 @@ static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
     if (cctx->stage != ZSTDcs_ending) {
         /* write one last empty block, make it the "last" block */
         U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
-        RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall, "no room for epilogue");
-        MEM_writeLE32(op, cBlockHeader24);
+        ZSTD_STATIC_ASSERT(ZSTD_BLOCKHEADERSIZE == 3);
+        RETURN_ERROR_IF(dstCapacity<3, dstSize_tooSmall, "no room for epilogue");
+        MEM_writeLE24(op, cBlockHeader24);
         op += ZSTD_blockHeaderSize;
         dstCapacity -= ZSTD_blockHeaderSize;
     }
@@ -5455,7 +5568,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced2(
                         cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
                         customMem);
 
-    if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
+    if (!cdict || ZSTD_isError( ZSTD_initCDict_internal(cdict,
                                     dict, dictSize,
                                     dictLoadMethod, dictContentType,
                                     cctxParams) )) {
@@ -5879,7 +5992,7 @@ static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
     if (zcs->appliedParams.inBufferMode == ZSTD_bm_stable) {
         assert(input->pos >= zcs->stableIn_notConsumed);
         input->pos -= zcs->stableIn_notConsumed;
-        ip -= zcs->stableIn_notConsumed;
+        if (ip) ip -= zcs->stableIn_notConsumed;
         zcs->stableIn_notConsumed = 0;
     }
     if (zcs->appliedParams.inBufferMode == ZSTD_bm_buffered) {
@@ -6138,7 +6251,7 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
 #ifdef ZSTD_MULTITHREAD
     /* If external matchfinder is enabled, make sure to fail before checking job size (for consistency) */
     RETURN_ERROR_IF(
-        params.useSequenceProducer == 1 && params.nbWorkers >= 1,
+        ZSTD_hasExtSeqProd(&params) && params.nbWorkers >= 1,
         parameter_combination_unsupported,
         "External sequence producer isn't supported with nbWorkers >= 1"
     );
@@ -6430,7 +6543,7 @@ ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
-                                                cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
+                                                cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
                                                 "Sequence validation failed");
         }
         RETURN_ERROR_IF(idx - seqPos->idx >= cctx->seqStore.maxNbSeq, externalSequences_invalid,
@@ -6568,7 +6681,7 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offBase, matchLength, cctx->appliedParams.cParams.minMatch, seqPos->posInSrc,
-                                                   cctx->appliedParams.cParams.windowLog, dictSize, cctx->appliedParams.useSequenceProducer),
+                                                   cctx->appliedParams.cParams.windowLog, dictSize, ZSTD_hasExtSeqProd(&cctx->appliedParams)),
                                                    "Sequence validation failed");
         }
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offBase, matchLength, litLength);
@@ -7014,19 +7127,27 @@ ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeH
 }
 
 void ZSTD_registerSequenceProducer(
-    ZSTD_CCtx* zc, void* mState,
-    ZSTD_sequenceProducer_F* mFinder
+    ZSTD_CCtx* zc,
+    void* extSeqProdState,
+    ZSTD_sequenceProducer_F extSeqProdFunc
+) {
+    assert(zc != NULL);
+    ZSTD_CCtxParams_registerSequenceProducer(
+        &zc->requestedParams, extSeqProdState, extSeqProdFunc
+    );
+}
+
+void ZSTD_CCtxParams_registerSequenceProducer(
+  ZSTD_CCtx_params* params,
+  void* extSeqProdState,
+  ZSTD_sequenceProducer_F extSeqProdFunc
 ) {
-    if (mFinder != NULL) {
-        ZSTD_externalMatchCtx emctx;
-        emctx.mState = mState;
-        emctx.mFinder = mFinder;
-        emctx.seqBuffer = NULL;
-        emctx.seqBufferCapacity = 0;
-        zc->externalMatchCtx = emctx;
-        zc->requestedParams.useSequenceProducer = 1;
+    assert(params != NULL);
+    if (extSeqProdFunc != NULL) {
+        params->extSeqProdFunc = extSeqProdFunc;
+        params->extSeqProdState = extSeqProdState;
     } else {
-        ZSTD_memset(&zc->externalMatchCtx, 0, sizeof(zc->externalMatchCtx));
-        zc->requestedParams.useSequenceProducer = 0;
+        params->extSeqProdFunc = NULL;
+        params->extSeqProdState = NULL;
     }
 }
diff --git a/thirdparty/zstd/compress/zstd_compress_internal.h b/thirdparty/zstd/compress/zstd_compress_internal.h
index 10f68d010e..e41d7b78ec 100644
--- a/thirdparty/zstd/compress/zstd_compress_internal.h
+++ b/thirdparty/zstd/compress/zstd_compress_internal.h
@@ -39,7 +39,7 @@ extern "C" {
                                        It's not a big deal though : candidate will just be sorted again.
                                        Additionally, candidate position 1 will be lost.
                                        But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
-                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy.
+                                       The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table reuse with a different strategy.
                                        This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
 
 
@@ -159,23 +159,24 @@ typedef struct {
 UNUSED_ATTR static const rawSeqStore_t kNullRawSeqStore = {NULL, 0, 0, 0, 0};
 
 typedef struct {
-    int price;
-    U32 off;
-    U32 mlen;
-    U32 litlen;
-    U32 rep[ZSTD_REP_NUM];
+    int price;  /* price from beginning of segment to this position */
+    U32 off;    /* offset of previous match */
+    U32 mlen;   /* length of previous match */
+    U32 litlen; /* nb of literals since previous match */
+    U32 rep[ZSTD_REP_NUM];  /* offset history after previous match */
 } ZSTD_optimal_t;
 
 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
 
+#define ZSTD_OPT_SIZE (ZSTD_OPT_NUM+3)
 typedef struct {
     /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
     unsigned* litFreq;           /* table of literals statistics, of size 256 */
     unsigned* litLengthFreq;     /* table of litLength statistics, of size (MaxLL+1) */
     unsigned* matchLengthFreq;   /* table of matchLength statistics, of size (MaxML+1) */
     unsigned* offCodeFreq;       /* table of offCode statistics, of size (MaxOff+1) */
-    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_NUM+1 */
-    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
+    ZSTD_match_t* matchTable;    /* list of found matches, of size ZSTD_OPT_SIZE */
+    ZSTD_optimal_t* priceTable;  /* All positions tracked by optimal parser, of size ZSTD_OPT_SIZE */
 
     U32  litSum;                 /* nb of literals */
     U32  litLengthSum;           /* nb of litLength codes */
@@ -228,7 +229,7 @@ struct ZSTD_matchState_t {
     U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
     BYTE* tagTable;                          /* For row-based matchFinder: A row-based table containing the hashes and head index. */
     U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
-    U64 hashSalt;                            /* For row-based matchFinder: salts the hash for re-use of tag table */
+    U64 hashSalt;                            /* For row-based matchFinder: salts the hash for reuse of tag table */
     U32 hashSaltEntropy;                     /* For row-based matchFinder: collects entropy for salt generation */
 
     U32* hashTable;
@@ -360,10 +361,11 @@ struct ZSTD_CCtx_params_s {
      * if the external matchfinder returns an error code. */
     int enableMatchFinderFallback;
 
-    /* Indicates whether an external matchfinder has been referenced.
-     * Users can't set this externally.
-     * It is set internally in ZSTD_registerSequenceProducer(). */
-    int useSequenceProducer;
+    /* Parameters for the external sequence producer API.
+     * Users set these parameters through ZSTD_registerSequenceProducer().
+     * It is not possible to set these parameters individually through the public API. */
+    void* extSeqProdState;
+    ZSTD_sequenceProducer_F extSeqProdFunc;
 
     /* Adjust the max block size*/
     size_t maxBlockSize;
@@ -401,14 +403,6 @@ typedef struct {
     ZSTD_entropyCTablesMetadata_t entropyMetadata;
 } ZSTD_blockSplitCtx;
 
-/* Context for block-level external matchfinder API */
-typedef struct {
-  void* mState;
-  ZSTD_sequenceProducer_F* mFinder;
-  ZSTD_Sequence* seqBuffer;
-  size_t seqBufferCapacity;
-} ZSTD_externalMatchCtx;
-
 struct ZSTD_CCtx_s {
     ZSTD_compressionStage_e stage;
     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
@@ -479,8 +473,9 @@ struct ZSTD_CCtx_s {
     /* Workspace for block splitter */
     ZSTD_blockSplitCtx blockSplitCtx;
 
-    /* Workspace for external matchfinder */
-    ZSTD_externalMatchCtx externalMatchCtx;
+    /* Buffer for output from external sequence producer */
+    ZSTD_Sequence* extSeqBuf;
+    size_t extSeqBufCapacity;
 };
 
 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@@ -1053,7 +1048,9 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
  * The least significant cycleLog bits of the indices must remain the same,
  * which may be 0. Every index up to maxDist in the past must be valid.
  */
-MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
                                            U32 maxDist, void const* src)
 {
     /* preemptive overflow correction:
@@ -1246,7 +1243,9 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
  * forget about the extDict. Handles overlap of the prefix and extDict.
  * Returns non-zero if the segment is contiguous.
  */
-MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
+MEM_STATIC
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_window_update(ZSTD_window_t* window,
                                   void const* src, size_t srcSize,
                                   int forceNonContiguous)
 {
@@ -1467,11 +1466,10 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
  * This cannot be used when long range matching is enabled.
  * Zstd will use these sequences, and pass the literals to a secondary block
  * compressor.
- * @return : An error code on failure.
  * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
  * access and data corruption.
  */
-size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
+void ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
 
 /** ZSTD_cycleLog() :
  *  condition for correct operation : hashLog > 1 */
@@ -1509,6 +1507,10 @@ ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition*
                                    const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
                                    const void* src, size_t blockSize, ZSTD_paramSwitch_e externalRepSearch);
 
+/* Returns 1 if an external sequence producer is registered, otherwise returns 0. */
+MEM_STATIC int ZSTD_hasExtSeqProd(const ZSTD_CCtx_params* params) {
+    return params->extSeqProdFunc != NULL;
+}
 
 /* ===============================================================
  * Deprecated definitions that are still used internally to avoid
diff --git a/thirdparty/zstd/compress/zstd_compress_superblock.c b/thirdparty/zstd/compress/zstd_compress_superblock.c
index 638c4acbe7..628a2dccd0 100644
--- a/thirdparty/zstd/compress/zstd_compress_superblock.c
+++ b/thirdparty/zstd/compress/zstd_compress_superblock.c
@@ -76,8 +76,8 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
     }
 
     {   int const flags = bmi2 ? HUF_flags_bmi2 : 0;
-        const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, oend-op, literals, litSize, hufTable, flags)
-                                          : HUF_compress4X_usingCTable(op, oend-op, literals, litSize, hufTable, flags);
+        const size_t cSize = singleStream ? HUF_compress1X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags)
+                                          : HUF_compress4X_usingCTable(op, (size_t)(oend-op), literals, litSize, hufTable, flags);
         op += cSize;
         cLitSize += cSize;
         if (cSize == 0 || ERR_isError(cSize)) {
@@ -102,7 +102,7 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
     switch(lhSize)
     {
     case 3: /* 2 - 2 - 10 - 10 */
-        {   U32 const lhc = hType + ((!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
+        {   U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)litSize<<4) + ((U32)cLitSize<<14);
             MEM_writeLE24(ostart, lhc);
             break;
         }
@@ -122,30 +122,30 @@ ZSTD_compressSubBlock_literal(const HUF_CElt* hufTable,
     }
     *entropyWritten = 1;
     DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)litSize, (U32)(op-ostart));
-    return op-ostart;
+    return (size_t)(op-ostart);
 }
 
 static size_t
 ZSTD_seqDecompressedSize(seqStore_t const* seqStore,
-                   const seqDef* sequences, size_t nbSeq,
-                         size_t litSize, int lastSequence)
+                   const seqDef* sequences, size_t nbSeqs,
+                         size_t litSize, int lastSubBlock)
 {
-    const seqDef* const sstart = sequences;
-    const seqDef* const send = sequences + nbSeq;
-    const seqDef* sp = sstart;
     size_t matchLengthSum = 0;
     size_t litLengthSum = 0;
-    (void)(litLengthSum); /* suppress unused variable warning on some environments */
-    while (send-sp > 0) {
-        ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
+    size_t n;
+    for (n=0; n<nbSeqs; n++) {
+        const ZSTD_sequenceLength seqLen = ZSTD_getSequenceLength(seqStore, sequences+n);
         litLengthSum += seqLen.litLength;
         matchLengthSum += seqLen.matchLength;
-        sp++;
     }
-    assert(litLengthSum <= litSize);
-    if (!lastSequence) {
+    DEBUGLOG(5, "ZSTD_seqDecompressedSize: %u sequences from %p: %u literals + %u matchlength",
+                (unsigned)nbSeqs, (const void*)sequences,
+                (unsigned)litLengthSum, (unsigned)matchLengthSum);
+    if (!lastSubBlock)
         assert(litLengthSum == litSize);
-    }
+    else
+        assert(litLengthSum <= litSize);
+    (void)litLengthSum;
     return matchLengthSum + litSize;
 }
 
@@ -180,14 +180,14 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
     /* Sequences Header */
     RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/,
                     dstSize_tooSmall, "");
-    if (nbSeq < 0x7F)
+    if (nbSeq < 128)
         *op++ = (BYTE)nbSeq;
     else if (nbSeq < LONGNBSEQ)
         op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
     else
         op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
     if (nbSeq==0) {
-        return op - ostart;
+        return (size_t)(op - ostart);
     }
 
     /* seqHead : flags for FSE encoding type */
@@ -209,7 +209,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
     }
 
     {   size_t const bitstreamSize = ZSTD_encodeSequences(
-                                        op, oend - op,
+                                        op, (size_t)(oend - op),
                                         fseTables->matchlengthCTable, mlCode,
                                         fseTables->offcodeCTable, ofCode,
                                         fseTables->litlengthCTable, llCode,
@@ -253,7 +253,7 @@ ZSTD_compressSubBlock_sequences(const ZSTD_fseCTables_t* fseTables,
 #endif
 
     *entropyWritten = 1;
-    return op - ostart;
+    return (size_t)(op - ostart);
 }
 
 /** ZSTD_compressSubBlock() :
@@ -279,7 +279,8 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
                 litSize, nbSeq, writeLitEntropy, writeSeqEntropy, lastBlock);
     {   size_t cLitSize = ZSTD_compressSubBlock_literal((const HUF_CElt*)entropy->huf.CTable,
                                                         &entropyMetadata->hufMetadata, literals, litSize,
-                                                        op, oend-op, bmi2, writeLitEntropy, litEntropyWritten);
+                                                        op, (size_t)(oend-op),
+                                                        bmi2, writeLitEntropy, litEntropyWritten);
         FORWARD_IF_ERROR(cLitSize, "ZSTD_compressSubBlock_literal failed");
         if (cLitSize == 0) return 0;
         op += cLitSize;
@@ -289,18 +290,18 @@ static size_t ZSTD_compressSubBlock(const ZSTD_entropyCTables_t* entropy,
                                                   sequences, nbSeq,
                                                   llCode, mlCode, ofCode,
                                                   cctxParams,
-                                                  op, oend-op,
+                                                  op, (size_t)(oend-op),
                                                   bmi2, writeSeqEntropy, seqEntropyWritten);
         FORWARD_IF_ERROR(cSeqSize, "ZSTD_compressSubBlock_sequences failed");
         if (cSeqSize == 0) return 0;
         op += cSeqSize;
     }
     /* Write block header */
-    {   size_t cSize = (op-ostart)-ZSTD_blockHeaderSize;
+    {   size_t cSize = (size_t)(op-ostart) - ZSTD_blockHeaderSize;
         U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
         MEM_writeLE24(ostart, cBlockHeader24);
     }
-    return op-ostart;
+    return (size_t)(op-ostart);
 }
 
 static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t litSize,
@@ -389,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
     return cSeqSizeEstimate + sequencesSectionHeaderSize;
 }
 
-static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
+typedef struct {
+    size_t estLitSize;
+    size_t estBlockSize;
+} EstimatedBlockSize;
+static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
                                         const BYTE* ofCodeTable,
                                         const BYTE* llCodeTable,
                                         const BYTE* mlCodeTable,
@@ -397,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize,
                                         const ZSTD_entropyCTables_t* entropy,
                                         const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
                                         void* workspace, size_t wkspSize,
-                                        int writeLitEntropy, int writeSeqEntropy) {
-    size_t cSizeEstimate = 0;
-    cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize,
-                                                         &entropy->huf, &entropyMetadata->hufMetadata,
-                                                         workspace, wkspSize, writeLitEntropy);
-    cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                        int writeLitEntropy, int writeSeqEntropy)
+{
+    EstimatedBlockSize ebs;
+    ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize,
+                                                        &entropy->huf, &entropyMetadata->hufMetadata,
+                                                        workspace, wkspSize, writeLitEntropy);
+    ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
                                                          nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
                                                          workspace, wkspSize, writeSeqEntropy);
-    return cSizeEstimate + ZSTD_blockHeaderSize;
+    ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize;
+    return ebs;
 }
 
 static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata)
@@ -419,13 +426,56 @@ static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMe
     return 0;
 }
 
+static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t seqCount)
+{
+    size_t n, total = 0;
+    assert(sp != NULL);
+    for (n=0; n<seqCount; n++) {
+        total += ZSTD_getSequenceLength(seqStore, sp+n).litLength;
+    }
+    DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total);
+    return total;
+}
+
+#define BYTESCALE 256
+
+static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs,
+                size_t targetBudget, size_t avgLitCost, size_t avgSeqCost,
+                int firstSubBlock)
+{
+    size_t n, budget = 0, inSize=0;
+    /* entropy headers */
+    size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */
+    assert(firstSubBlock==0 || firstSubBlock==1);
+    budget += headerSize;
+
+    /* first sequence => at least one sequence*/
+    budget += sp[0].litLength * avgLitCost + avgSeqCost;
+    if (budget > targetBudget) return 1;
+    inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH);
+
+    /* loop over sequences */
+    for (n=1; n<nbSeqs; n++) {
+        size_t currentCost = sp[n].litLength * avgLitCost + avgSeqCost;
+        budget += currentCost;
+        inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH);
+        /* stop when sub-block budget is reached */
+        if ( (budget > targetBudget)
+            /* though continue to expand until the sub-block is deemed compressible */
+          && (budget < inSize * BYTESCALE) )
+            break;
+    }
+
+    return n;
+}
+
 /** ZSTD_compressSubBlock_multi() :
  *  Breaks super-block into multiple sub-blocks and compresses them.
- *  Entropy will be written to the first block.
- *  The following blocks will use repeat mode to compress.
- *  All sub-blocks are compressed blocks (no raw or rle blocks).
- *  @return : compressed size of the super block (which is multiple ZSTD blocks)
- *            Or 0 if it failed to compress. */
+ *  Entropy will be written into the first block.
+ *  The following blocks use repeat_mode to compress.
+ *  Sub-blocks are all compressed, except the last one when beneficial.
+ *  @return : compressed size of the super block (which features multiple ZSTD blocks)
+ *            or 0 if it failed to compress. */
 static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
                             const ZSTD_compressedBlockState_t* prevCBlock,
                             ZSTD_compressedBlockState_t* nextCBlock,
@@ -438,10 +488,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
 {
     const seqDef* const sstart = seqStorePtr->sequencesStart;
     const seqDef* const send = seqStorePtr->sequences;
-    const seqDef* sp = sstart;
+    const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */
+    size_t const nbSeqs = (size_t)(send - sstart);
     const BYTE* const lstart = seqStorePtr->litStart;
     const BYTE* const lend = seqStorePtr->lit;
     const BYTE* lp = lstart;
+    size_t const nbLiterals = (size_t)(lend - lstart);
     BYTE const* ip = (BYTE const*)src;
     BYTE const* const iend = ip + srcSize;
     BYTE* const ostart = (BYTE*)dst;
@@ -450,96 +502,152 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
     const BYTE* llCodePtr = seqStorePtr->llCode;
     const BYTE* mlCodePtr = seqStorePtr->mlCode;
     const BYTE* ofCodePtr = seqStorePtr->ofCode;
-    size_t targetCBlockSize = cctxParams->targetCBlockSize;
-    size_t litSize, seqCount;
-    int writeLitEntropy = entropyMetadata->hufMetadata.hType == set_compressed;
+    size_t const minTarget = ZSTD_TARGETCBLOCKSIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */
+    size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize);
+    int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed);
     int writeSeqEntropy = 1;
-    int lastSequence = 0;
-
-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (litSize=%u, nbSeq=%u)",
-                (unsigned)(lend-lp), (unsigned)(send-sstart));
-
-    litSize = 0;
-    seqCount = 0;
-    do {
-        size_t cBlockSizeEstimate = 0;
-        if (sstart == send) {
-            lastSequence = 1;
-        } else {
-            const seqDef* const sequence = sp + seqCount;
-            lastSequence = sequence == send - 1;
-            litSize += ZSTD_getSequenceLength(seqStorePtr, sequence).litLength;
-            seqCount++;
-        }
-        if (lastSequence) {
-            assert(lp <= lend);
-            assert(litSize <= (size_t)(lend - lp));
-            litSize = (size_t)(lend - lp);
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)",
+               (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart));
+
+        /* let's start by a general estimation for the full block */
+    if (nbSeqs > 0) {
+        EstimatedBlockSize const ebs =
+                ZSTD_estimateSubBlockSize(lp, nbLiterals,
+                                        ofCodePtr, llCodePtr, mlCodePtr, nbSeqs,
+                                        &nextCBlock->entropy, entropyMetadata,
+                                        workspace, wkspSize,
+                                        writeLitEntropy, writeSeqEntropy);
+        /* quick estimation */
+        size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE;
+        size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs;
+        const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1);
+        size_t n, avgBlockBudget, blockBudgetSupp=0;
+        avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks;
+        DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes",
+                    (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE,
+                    (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE);
+        /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately
+         * this will result in the production of a single uncompressed block covering @srcSize.*/
+        if (ebs.estBlockSize > srcSize) return 0;
+
+        /* compress and write sub-blocks */
+        assert(nbSubBlocks>0);
+        for (n=0; n < nbSubBlocks-1; n++) {
+            /* determine nb of sequences for current sub-block + nbLiterals from next sequence */
+            size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp),
+                                        avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0);
+            /* if reached last sequence : break to last sub-block (simplification) */
+            assert(seqCount <= (size_t)(send-sp));
+            if (sp + seqCount == send) break;
+            assert(seqCount > 0);
+            /* compress sub-block */
+            {   int litEntropyWritten = 0;
+                int seqEntropyWritten = 0;
+                size_t litSize = countLiterals(seqStorePtr, sp, seqCount);
+                const size_t decompressedSize =
+                        ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0);
+                size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                                sp, seqCount,
+                                                lp, litSize,
+                                                llCodePtr, mlCodePtr, ofCodePtr,
+                                                cctxParams,
+                                                op, (size_t)(oend-op),
+                                                bmi2, writeLitEntropy, writeSeqEntropy,
+                                                &litEntropyWritten, &seqEntropyWritten,
+                                                0);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+
+                /* check compressibility, update state components */
+                if (cSize > 0 && cSize < decompressedSize) {
+                    DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes",
+                                (unsigned)decompressedSize, (unsigned)cSize);
+                    assert(ip + decompressedSize <= iend);
+                    ip += decompressedSize;
+                    lp += litSize;
+                    op += cSize;
+                    llCodePtr += seqCount;
+                    mlCodePtr += seqCount;
+                    ofCodePtr += seqCount;
+                    /* Entropy only needs to be written once */
+                    if (litEntropyWritten) {
+                        writeLitEntropy = 0;
+                    }
+                    if (seqEntropyWritten) {
+                        writeSeqEntropy = 0;
+                    }
+                    sp += seqCount;
+                    blockBudgetSupp = 0;
+            }   }
+            /* otherwise : do not compress yet, coalesce current sub-block with following one */
         }
-        /* I think there is an optimization opportunity here.
-         * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
-         * since it recalculates estimate from scratch.
-         * For example, it would recount literal distribution and symbol codes every time.
-         */
-        cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
-                                                       &nextCBlock->entropy, entropyMetadata,
-                                                       workspace, wkspSize, writeLitEntropy, writeSeqEntropy);
-        if (cBlockSizeEstimate > targetCBlockSize || lastSequence) {
-            int litEntropyWritten = 0;
-            int seqEntropyWritten = 0;
-            const size_t decompressedSize = ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSequence);
-            const size_t cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
-                                                       sp, seqCount,
-                                                       lp, litSize,
-                                                       llCodePtr, mlCodePtr, ofCodePtr,
-                                                       cctxParams,
-                                                       op, oend-op,
-                                                       bmi2, writeLitEntropy, writeSeqEntropy,
-                                                       &litEntropyWritten, &seqEntropyWritten,
-                                                       lastBlock && lastSequence);
-            FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
-            if (cSize > 0 && cSize < decompressedSize) {
-                DEBUGLOG(5, "Committed the sub-block");
-                assert(ip + decompressedSize <= iend);
-                ip += decompressedSize;
-                sp += seqCount;
-                lp += litSize;
-                op += cSize;
-                llCodePtr += seqCount;
-                mlCodePtr += seqCount;
-                ofCodePtr += seqCount;
-                litSize = 0;
-                seqCount = 0;
-                /* Entropy only needs to be written once */
-                if (litEntropyWritten) {
-                    writeLitEntropy = 0;
-                }
-                if (seqEntropyWritten) {
-                    writeSeqEntropy = 0;
-                }
+    } /* if (nbSeqs > 0) */
+
+    /* write last block */
+    DEBUGLOG(5, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp));
+    {   int litEntropyWritten = 0;
+        int seqEntropyWritten = 0;
+        size_t litSize = (size_t)(lend - lp);
+        size_t seqCount = (size_t)(send - sp);
+        const size_t decompressedSize =
+                ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1);
+        size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata,
+                                            sp, seqCount,
+                                            lp, litSize,
+                                            llCodePtr, mlCodePtr, ofCodePtr,
+                                            cctxParams,
+                                            op, (size_t)(oend-op),
+                                            bmi2, writeLitEntropy, writeSeqEntropy,
+                                            &litEntropyWritten, &seqEntropyWritten,
+                                            lastBlock);
+        FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed");
+
+        /* update pointers, the nb of literals borrowed from next sequence must be preserved */
+        if (cSize > 0 && cSize < decompressedSize) {
+            DEBUGLOG(5, "Last sub-block compressed %u bytes => %u bytes",
+                        (unsigned)decompressedSize, (unsigned)cSize);
+            assert(ip + decompressedSize <= iend);
+            ip += decompressedSize;
+            lp += litSize;
+            op += cSize;
+            llCodePtr += seqCount;
+            mlCodePtr += seqCount;
+            ofCodePtr += seqCount;
+            /* Entropy only needs to be written once */
+            if (litEntropyWritten) {
+                writeLitEntropy = 0;
+            }
+            if (seqEntropyWritten) {
+                writeSeqEntropy = 0;
             }
+            sp += seqCount;
         }
-    } while (!lastSequence);
+    }
+
+
     if (writeLitEntropy) {
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has literal entropy tables unwritten");
+        DEBUGLOG(5, "Literal entropy tables were never written");
         ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf));
     }
     if (writeSeqEntropy && ZSTD_needSequenceEntropyTables(&entropyMetadata->fseMetadata)) {
         /* If we haven't written our entropy tables, then we've violated our contract and
          * must emit an uncompressed block.
          */
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi has sequence entropy tables unwritten");
+        DEBUGLOG(5, "Sequence entropy tables were never written => cancel, emit an uncompressed block");
         return 0;
     }
+
     if (ip < iend) {
-        size_t const cSize = ZSTD_noCompressBlock(op, oend - op, ip, iend - ip, lastBlock);
-        DEBUGLOG(5, "ZSTD_compressSubBlock_multi last sub-block uncompressed, %zu bytes", (size_t)(iend - ip));
+        /* some data left : last part of the block sent uncompressed */
+        size_t const rSize = (size_t)((iend - ip));
+        size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock);
+        DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize));
         FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
         assert(cSize != 0);
         op += cSize;
         /* We have to regenerate the repcodes because we've skipped some sequences */
         if (sp < send) {
-            seqDef const* seq;
+            const seqDef* seq;
             repcodes_t rep;
             ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
             for (seq = sstart; seq < sp; ++seq) {
@@ -548,14 +656,17 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
             ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
         }
     }
-    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed");
-    return op-ostart;
+
+    DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u",
+                (unsigned)(op-ostart));
+    return (size_t)(op-ostart);
 }
 
 size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
                                void* dst, size_t dstCapacity,
-                               void const* src, size_t srcSize,
-                               unsigned lastBlock) {
+                               const void* src, size_t srcSize,
+                               unsigned lastBlock)
+{
     ZSTD_entropyCTablesMetadata_t entropyMetadata;
 
     FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
diff --git a/thirdparty/zstd/compress/zstd_cwksp.h b/thirdparty/zstd/compress/zstd_cwksp.h
index cc7fb1c715..3eddbd334e 100644
--- a/thirdparty/zstd/compress/zstd_cwksp.h
+++ b/thirdparty/zstd/compress/zstd_cwksp.h
@@ -192,6 +192,7 @@ MEM_STATIC void ZSTD_cwksp_assert_internal_consistency(ZSTD_cwksp* ws) {
     {
         intptr_t const offset = __msan_test_shadow(ws->initOnceStart,
             (U8*)ZSTD_cwksp_initialAllocStart(ws) - (U8*)ws->initOnceStart);
+        (void)offset;
 #if defined(ZSTD_MSAN_PRINT)
         if(offset!=-1) {
             __msan_print_shadow((U8*)ws->initOnceStart + offset - 8, 32);
@@ -433,7 +434,7 @@ MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
 
 /**
  * Aligned on 64 bytes. These buffers have the special property that
- * their values remain constrained, allowing us to re-use them without
+ * their values remain constrained, allowing us to reuse them without
  * memset()-ing them.
  */
 MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
@@ -525,7 +526,7 @@ MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
     DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
 
 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the table re-use logic is sound, and that we don't
+    /* To validate that the table reuse logic is sound, and that we don't
      * access table space that we haven't cleaned, we re-"poison" the table
      * space every time we mark it dirty.
      * Since tableValidEnd space and initOnce space may overlap we don't poison
@@ -602,9 +603,9 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
     DEBUGLOG(4, "cwksp: clearing!");
 
 #if ZSTD_MEMORY_SANITIZER && !defined (ZSTD_MSAN_DONT_POISON_WORKSPACE)
-    /* To validate that the context re-use logic is sound, and that we don't
+    /* To validate that the context reuse logic is sound, and that we don't
      * access stuff that this compression hasn't initialized, we re-"poison"
-     * the workspace except for the areas in which we expect memory re-use
+     * the workspace except for the areas in which we expect memory reuse
      * without initialization (objects, valid tables area and init once
      * memory). */
     {
@@ -635,6 +636,15 @@ MEM_STATIC void ZSTD_cwksp_clear(ZSTD_cwksp* ws) {
     ZSTD_cwksp_assert_internal_consistency(ws);
 }
 
+MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
+}
+
+MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
+    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
+         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
+}
+
 /**
  * The provided workspace takes ownership of the buffer [start, start+size).
  * Any existing values in the workspace are ignored (the previously managed
@@ -666,6 +676,11 @@ MEM_STATIC size_t ZSTD_cwksp_create(ZSTD_cwksp* ws, size_t size, ZSTD_customMem
 MEM_STATIC void ZSTD_cwksp_free(ZSTD_cwksp* ws, ZSTD_customMem customMem) {
     void *ptr = ws->workspace;
     DEBUGLOG(4, "cwksp: freeing workspace");
+#if ZSTD_MEMORY_SANITIZER && !defined(ZSTD_MSAN_DONT_POISON_WORKSPACE)
+    if (ptr != NULL && customMem.customFree != NULL) {
+        __msan_unpoison(ptr, ZSTD_cwksp_sizeof(ws));
+    }
+#endif
     ZSTD_memset(ws, 0, sizeof(ZSTD_cwksp));
     ZSTD_customFree(ptr, customMem);
 }
@@ -679,15 +694,6 @@ MEM_STATIC void ZSTD_cwksp_move(ZSTD_cwksp* dst, ZSTD_cwksp* src) {
     ZSTD_memset(src, 0, sizeof(ZSTD_cwksp));
 }
 
-MEM_STATIC size_t ZSTD_cwksp_sizeof(const ZSTD_cwksp* ws) {
-    return (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->workspace);
-}
-
-MEM_STATIC size_t ZSTD_cwksp_used(const ZSTD_cwksp* ws) {
-    return (size_t)((BYTE*)ws->tableEnd - (BYTE*)ws->workspace)
-         + (size_t)((BYTE*)ws->workspaceEnd - (BYTE*)ws->allocStart);
-}
-
 MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
     return ws->allocFailed;
 }
diff --git a/thirdparty/zstd/compress/zstd_double_fast.c b/thirdparty/zstd/compress/zstd_double_fast.c
index 0ad88ffc7b..a4e9c50d3b 100644
--- a/thirdparty/zstd/compress/zstd_double_fast.c
+++ b/thirdparty/zstd/compress/zstd_double_fast.c
@@ -11,7 +11,11 @@
 #include "zstd_compress_internal.h"
 #include "zstd_double_fast.h"
 
-static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -47,7 +51,9 @@ static void ZSTD_fillDoubleHashTableForCDict(ZSTD_matchState_t* ms,
     }   }
 }
 
-static void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillDoubleHashTableForCCtx(ZSTD_matchState_t* ms,
                               void const* end, ZSTD_dictTableLoadMethod_e dtlm)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -95,6 +101,7 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
 
 
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_doubleFast_noDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize, U32 const mls /* template */)
@@ -305,6 +312,7 @@ _match_stored:
 
 
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize,
@@ -348,8 +356,8 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
     if (ms->prefetchCDictTables) {
         size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
         size_t const chainTableBytes = (((size_t)1) << dictCParams->chainLog) * sizeof(U32);
-        PREFETCH_AREA(dictHashLong, hashTableBytes)
-        PREFETCH_AREA(dictHashSmall, chainTableBytes)
+        PREFETCH_AREA(dictHashLong, hashTableBytes);
+        PREFETCH_AREA(dictHashSmall, chainTableBytes);
     }
 
     /* init */
@@ -589,7 +597,9 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
 }
 
 
-static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_doubleFast_extDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize,
         U32 const mls /* template */)
@@ -756,3 +766,5 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
         return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
     }
 }
+
+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
diff --git a/thirdparty/zstd/compress/zstd_double_fast.h b/thirdparty/zstd/compress/zstd_double_fast.h
index 6f0047c4ba..ce6ed8c97f 100644
--- a/thirdparty/zstd/compress/zstd_double_fast.h
+++ b/thirdparty/zstd/compress/zstd_double_fast.h
@@ -18,9 +18,12 @@ extern "C" {
 #include "../common/mem.h"      /* U32 */
 #include "zstd_compress_internal.h"     /* ZSTD_CCtx, size_t */
 
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
+
 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
                               void const* end, ZSTD_dictTableLoadMethod_e dtlm,
                               ZSTD_tableFillPurpose_e tfp);
+
 size_t ZSTD_compressBlock_doubleFast(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
@@ -31,6 +34,14 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
 
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST ZSTD_compressBlock_doubleFast
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE ZSTD_compressBlock_doubleFast_dictMatchState
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT ZSTD_compressBlock_doubleFast_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST NULL
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_DOUBLEFAST_EXTDICT NULL
+#endif /* ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR */
 
 #if defined (__cplusplus)
 }
diff --git a/thirdparty/zstd/compress/zstd_fast.c b/thirdparty/zstd/compress/zstd_fast.c
index 5f2c6a2eda..6c4554cfca 100644
--- a/thirdparty/zstd/compress/zstd_fast.c
+++ b/thirdparty/zstd/compress/zstd_fast.c
@@ -11,7 +11,9 @@
 #include "zstd_compress_internal.h"  /* ZSTD_hashPtr, ZSTD_count, ZSTD_storeSeq */
 #include "zstd_fast.h"
 
-static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
                         const void* const end,
                         ZSTD_dictTableLoadMethod_e dtlm)
 {
@@ -46,7 +48,9 @@ static void ZSTD_fillHashTableForCDict(ZSTD_matchState_t* ms,
                 }   }   }   }
 }
 
-static void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_fillHashTableForCCtx(ZSTD_matchState_t* ms,
                         const void* const end,
                         ZSTD_dictTableLoadMethod_e dtlm)
 {
@@ -139,8 +143,9 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
  *
  * This is also the work we do at the beginning to enter the loop initially.
  */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_fast_noDict_generic(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_fast_noDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize,
         U32 const mls, U32 const hasStep)
@@ -456,6 +461,7 @@ size_t ZSTD_compressBlock_fast(
 }
 
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_fast_dictMatchState_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
@@ -502,7 +508,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
 
     if (ms->prefetchCDictTables) {
         size_t const hashTableBytes = (((size_t)1) << dictCParams->hashLog) * sizeof(U32);
-        PREFETCH_AREA(dictHashTable, hashTableBytes)
+        PREFETCH_AREA(dictHashTable, hashTableBytes);
     }
 
     /* init */
@@ -681,7 +687,9 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
 }
 
 
-static size_t ZSTD_compressBlock_fast_extDict_generic(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_fast_extDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
 {
diff --git a/thirdparty/zstd/compress/zstd_lazy.c b/thirdparty/zstd/compress/zstd_lazy.c
index 5ba88e8678..67dd55fdb8 100644
--- a/thirdparty/zstd/compress/zstd_lazy.c
+++ b/thirdparty/zstd/compress/zstd_lazy.c
@@ -12,6 +12,11 @@
 #include "zstd_lazy.h"
 #include "../common/bits.h" /* ZSTD_countTrailingZeros64 */
 
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
+
 #define kLazySkippingStep 8
 
 
@@ -19,8 +24,9 @@
 *  Binary Tree search
 ***************************************/
 
-static void
-ZSTD_updateDUBT(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_updateDUBT(ZSTD_matchState_t* ms,
                 const BYTE* ip, const BYTE* iend,
                 U32 mls)
 {
@@ -63,8 +69,9 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
  *  sort one already inserted but unsorted position
  *  assumption : curr >= btlow == (curr - btmask)
  *  doesn't fail */
-static void
-ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
                  U32 curr, const BYTE* inputEnd,
                  U32 nbCompares, U32 btLow,
                  const ZSTD_dictMode_e dictMode)
@@ -152,8 +159,9 @@ ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
 }
 
 
-static size_t
-ZSTD_DUBT_findBetterDictMatch (
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_DUBT_findBetterDictMatch (
         const ZSTD_matchState_t* ms,
         const BYTE* const ip, const BYTE* const iend,
         size_t* offsetPtr,
@@ -230,8 +238,9 @@ ZSTD_DUBT_findBetterDictMatch (
 }
 
 
-static size_t
-ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
                         const BYTE* const ip, const BYTE* const iend,
                         size_t* offBasePtr,
                         U32 const mls,
@@ -381,8 +390,9 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
 
 
 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
                 const BYTE* const ip, const BYTE* const iLimit,
                       size_t* offBasePtr,
                 const U32 mls /* template */,
@@ -617,7 +627,9 @@ size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nb
 
 /* Update chains up to ip (excluded)
    Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertAndFindFirstIndex_internal(
                         ZSTD_matchState_t* ms,
                         const ZSTD_compressionParameters* const cParams,
                         const BYTE* ip, U32 const mls, U32 const lazySkipping)
@@ -651,6 +663,7 @@ U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
 
 /* inlining is important to hardwire a hot branch (template emulation) */
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_HcFindBestMatch(
                         ZSTD_matchState_t* ms,
                         const BYTE* const ip, const BYTE* const iLimit,
@@ -819,7 +832,9 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, BYTE const* t
  * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
  * but not beyond iLimit.
  */
-FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
                                    U32 const rowLog, U32 const mls,
                                    U32 idx, const BYTE* const iLimit)
 {
@@ -845,7 +860,9 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const B
  * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
  * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
  */
-FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
                                                   BYTE const* tagTable, BYTE const* base,
                                                   U32 idx, U32 const hashLog,
                                                   U32 const rowLog, U32 const mls,
@@ -863,10 +880,12 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTab
 /* ZSTD_row_update_internalImpl():
  * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
  */
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
-                                                        U32 updateStartIdx, U32 const updateEndIdx,
-                                                        U32 const mls, U32 const rowLog,
-                                                        U32 const rowMask, U32 const useCache)
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
+                                  U32 updateStartIdx, U32 const updateEndIdx,
+                                  U32 const mls, U32 const rowLog,
+                                  U32 const rowMask, U32 const useCache)
 {
     U32* const hashTable = ms->hashTable;
     BYTE* const tagTable = ms->tagTable;
@@ -892,9 +911,11 @@ FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
  * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
  * Skips sections of long matches as is necessary.
  */
-FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
-                                                    U32 const mls, U32 const rowLog,
-                                                    U32 const rowMask, U32 const useCache)
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+                              U32 const mls, U32 const rowLog,
+                              U32 const rowMask, U32 const useCache)
 {
     U32 idx = ms->nextToUpdate;
     const BYTE* const base = ms->window.base;
@@ -1102,20 +1123,21 @@ ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 headGr
 
 /* The high-level approach of the SIMD row based match finder is as follows:
  * - Figure out where to insert the new entry:
- *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
- *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ *      - Generate a hash for current input posistion and split it into a one byte of tag and `rowHashLog` bits of index.
+ *           - The hash is salted by a value that changes on every contex reset, so when the same table is used
+ *             we will avoid collisions that would otherwise slow us down by intorducing phantom matches.
+ *      - The hashTable is effectively split into groups or "rows" of 15 or 31 entries of U32, and the index determines
  *        which row to insert into.
- *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
- *        be considered as a circular buffer with a "head" index that resides in the tagTable.
- *      - Also insert the "tag" into the equivalent row and position in the tagTable.
- *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
- *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
- *                  for alignment/performance reasons, leaving some bytes unused.
- * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ *      - Determine the correct position within the row to insert the entry into. Each row of 15 or 31 can
+ *        be considered as a circular buffer with a "head" index that resides in the tagTable (overall 16 or 32 bytes
+ *        per row).
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte tag calculated for the position and
  *   generate a bitfield that we can cycle through to check the collisions in the hash table.
  * - Pick the longest match.
+ * - Insert the tag into the equivalent row and position in the tagTable.
  */
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_RowFindBestMatch(
                         ZSTD_matchState_t* ms,
                         const BYTE* const ip, const BYTE* const iLimit,
@@ -1489,8 +1511,9 @@ FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
 *  Common parser - lazy strategy
 *********************************/
 
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_lazy_generic(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_compressBlock_lazy_generic(
                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
                         U32 rep[ZSTD_REP_NUM],
                         const void* src, size_t srcSize,
@@ -1754,152 +1777,163 @@ _storeSequence:
     /* Return the last literals size */
     return (size_t)(iend - anchor);
 }
+#endif /* build exclusions */
 
 
-size_t ZSTD_compressBlock_btlazy2(
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_greedy(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
 }
 
-size_t ZSTD_compressBlock_lazy2(
+size_t ZSTD_compressBlock_greedy_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
 }
 
-size_t ZSTD_compressBlock_lazy(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
 }
 
-size_t ZSTD_compressBlock_greedy(
+size_t ZSTD_compressBlock_greedy_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
 }
 
-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
 }
 
-size_t ZSTD_compressBlock_lazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
 }
+#endif
 
-size_t ZSTD_compressBlock_lazy_dictMatchState(
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict);
 }
 
-size_t ZSTD_compressBlock_greedy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState);
 }
 
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
 }
 
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dedicatedDictSearch);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
 }
 
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
 }
 
-/* Row-based matchfinder */
-size_t ZSTD_compressBlock_lazy2_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
 }
+#endif
 
-size_t ZSTD_compressBlock_lazy_row(
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict);
 }
 
-size_t ZSTD_compressBlock_greedy_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState);
 }
 
-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dedicatedDictSearch);
 }
 
-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
 }
 
-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
 }
 
-
 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
 }
+#endif
 
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict);
 }
 
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState);
 }
+#endif
 
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_compressBlock_lazy_extDict_generic(
                         ZSTD_matchState_t* ms, seqStore_t* seqStore,
                         U32 rep[ZSTD_REP_NUM],
@@ -2101,8 +2135,9 @@ _storeSequence:
     /* Return the last literals size */
     return (size_t)(iend - anchor);
 }
+#endif /* build exclusions */
 
-
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_greedy_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
@@ -2110,48 +2145,55 @@ size_t ZSTD_compressBlock_greedy_extDict(
     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0);
 }
 
-size_t ZSTD_compressBlock_lazy_extDict(
+size_t ZSTD_compressBlock_greedy_extDict_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
-
 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
 }
+#endif
 
-size_t ZSTD_compressBlock_lazy2_extDict(
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 
 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1);
 }
 
-size_t ZSTD_compressBlock_btlazy2_extDict(
+size_t ZSTD_compressBlock_lazy_extDict_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
 
 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
 }
+#endif
 
-size_t ZSTD_compressBlock_greedy_extDict_row(
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
+
 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2);
 }
 
-size_t ZSTD_compressBlock_lazy_extDict_row(
+size_t ZSTD_compressBlock_lazy2_extDict_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
-
 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
 }
+#endif
 
-size_t ZSTD_compressBlock_lazy2_extDict_row(
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
+
 {
-    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
 }
+#endif
diff --git a/thirdparty/zstd/compress/zstd_lazy.h b/thirdparty/zstd/compress/zstd_lazy.h
index 3bde67331e..3635813bdd 100644
--- a/thirdparty/zstd/compress/zstd_lazy.h
+++ b/thirdparty/zstd/compress/zstd_lazy.h
@@ -27,98 +27,173 @@ extern "C" {
 
 #define ZSTD_ROW_HASH_TAG_BITS 8        /* nb bits to use for the tag */
 
+#if !defined(ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR)
 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
 void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
 
 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
 
 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue);  /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */
+#endif
 
-size_t ZSTD_compressBlock_btlazy2(
+#ifndef ZSTD_EXCLUDE_GREEDY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_greedy(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2(
+size_t ZSTD_compressBlock_greedy_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy(
+size_t ZSTD_compressBlock_greedy_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy(
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_row(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_row(
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_row(
+size_t ZSTD_compressBlock_greedy_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_btlazy2_dictMatchState(
+size_t ZSTD_compressBlock_greedy_extDict_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dictMatchState(
+
+#define ZSTD_COMPRESSBLOCK_GREEDY ZSTD_compressBlock_greedy
+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW ZSTD_compressBlock_greedy_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE ZSTD_compressBlock_greedy_dictMatchState
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW ZSTD_compressBlock_greedy_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH ZSTD_compressBlock_greedy_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_greedy_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT ZSTD_compressBlock_greedy_extDict
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW ZSTD_compressBlock_greedy_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_GREEDY NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_GREEDY_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_LAZY_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dictMatchState(
+size_t ZSTD_compressBlock_lazy_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
+size_t ZSTD_compressBlock_lazy_extDict_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
+
+#define ZSTD_COMPRESSBLOCK_LAZY ZSTD_compressBlock_lazy
+#define ZSTD_COMPRESSBLOCK_LAZY_ROW ZSTD_compressBlock_lazy_row
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE ZSTD_compressBlock_lazy_dictMatchState
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT ZSTD_compressBlock_lazy_extDict
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW ZSTD_compressBlock_lazy_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_LAZY NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_LAZY_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_LAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_lazy2(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_greedy_extDict(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict(
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_lazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_greedy_extDict_row(
+size_t ZSTD_compressBlock_lazy2_extDict_row(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy_extDict_row(
+
+#define ZSTD_COMPRESSBLOCK_LAZY2 ZSTD_compressBlock_lazy2
+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW ZSTD_compressBlock_lazy2_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE ZSTD_compressBlock_lazy2_dictMatchState
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW ZSTD_compressBlock_lazy2_dictMatchState_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH ZSTD_compressBlock_lazy2_dedicatedDictSearch
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW ZSTD_compressBlock_lazy2_dedicatedDictSearch_row
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT ZSTD_compressBlock_lazy2_extDict
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW ZSTD_compressBlock_lazy2_extDict_row
+#else
+#define ZSTD_COMPRESSBLOCK_LAZY2 NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DICTMATCHSTATE_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_DEDICATEDDICTSEARCH_ROW NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_LAZY2_EXTDICT_ROW NULL
+#endif
+
+#ifndef ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btlazy2(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_lazy2_extDict_row(
+size_t ZSTD_compressBlock_btlazy2_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btlazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
 
+#define ZSTD_COMPRESSBLOCK_BTLAZY2 ZSTD_compressBlock_btlazy2
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE ZSTD_compressBlock_btlazy2_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT ZSTD_compressBlock_btlazy2_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_BTLAZY2 NULL
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTLAZY2_EXTDICT NULL
+#endif
+
 
 #if defined (__cplusplus)
 }
diff --git a/thirdparty/zstd/compress/zstd_ldm.c b/thirdparty/zstd/compress/zstd_ldm.c
index 3d74ff19e3..17c069fe1d 100644
--- a/thirdparty/zstd/compress/zstd_ldm.c
+++ b/thirdparty/zstd/compress/zstd_ldm.c
@@ -246,7 +246,11 @@ static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
         break;
 
     case ZSTD_dfast:
+#ifndef ZSTD_EXCLUDE_DFAST_BLOCK_COMPRESSOR
         ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast, ZSTD_tfp_forCCtx);
+#else
+        assert(0); /* shouldn't be called: cparams should've been adjusted. */
+#endif
         break;
 
     case ZSTD_greedy:
@@ -318,7 +322,9 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
     }
 }
 
-static size_t ZSTD_ldm_generateSequences_internal(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_ldm_generateSequences_internal(
         ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
         ldmParams_t const* params, void const* src, size_t srcSize)
 {
@@ -689,7 +695,6 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
         /* maybeSplitSequence updates rawSeqStore->pos */
         rawSeq const sequence = maybeSplitSequence(rawSeqStore,
                                                    (U32)(iend - ip), minMatch);
-        int i;
         /* End signal */
         if (sequence.offset == 0)
             break;
@@ -702,6 +707,7 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
         /* Run the block compressor */
         DEBUGLOG(5, "pos %u : calling block compressor on segment of size %u", (unsigned)(ip-istart), sequence.litLength);
         {
+            int i;
             size_t const newLitLength =
                 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
             ip += sequence.litLength;
diff --git a/thirdparty/zstd/compress/zstd_opt.c b/thirdparty/zstd/compress/zstd_opt.c
index f02a760946..e63073e5a4 100644
--- a/thirdparty/zstd/compress/zstd_opt.c
+++ b/thirdparty/zstd/compress/zstd_opt.c
@@ -12,6 +12,9 @@
 #include "hist.h"
 #include "zstd_opt.h"
 
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
 
 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
 #define ZSTD_MAX_PRICE     (1<<30)
@@ -264,6 +267,7 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
                                 const optState_t* const optPtr,
                                 int optLevel)
 {
+    DEBUGLOG(8, "ZSTD_rawLiteralsCost (%u literals)", litLength);
     if (litLength == 0) return 0;
 
     if (!ZSTD_compressedLiterals(optPtr))
@@ -402,9 +406,11 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
 
 /* Update hashTable3 up to ip (excluded)
    Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
-                                              U32* nextToUpdate3,
-                                              const BYTE* const ip)
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
+                                       U32* nextToUpdate3,
+                                       const BYTE* const ip)
 {
     U32* const hashTable3 = ms->hashTable3;
     U32 const hashLog3 = ms->hashLog3;
@@ -431,7 +437,9 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
  * @param ip assumed <= iend-8 .
  * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
  * @return : nb of positions added */
-static U32 ZSTD_insertBt1(
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_insertBt1(
                 const ZSTD_matchState_t* ms,
                 const BYTE* const ip, const BYTE* const iend,
                 U32 const target,
@@ -550,6 +558,7 @@ static U32 ZSTD_insertBt1(
 }
 
 FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 void ZSTD_updateTree_internal(
                 ZSTD_matchState_t* ms,
                 const BYTE* const ip, const BYTE* const iend,
@@ -558,7 +567,7 @@ void ZSTD_updateTree_internal(
     const BYTE* const base = ms->window.base;
     U32 const target = (U32)(ip - base);
     U32 idx = ms->nextToUpdate;
-    DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
+    DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u  (dictMode:%u)",
                 idx, target, dictMode);
 
     while(idx < target) {
@@ -575,7 +584,9 @@ void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
     ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict);
 }
 
-FORCE_INLINE_TEMPLATE U32
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32
 ZSTD_insertBtAndGetAllMatches (
                 ZSTD_match_t* matches,  /* store result (found matches) in this table (presumed large enough) */
                 ZSTD_matchState_t* ms,
@@ -816,7 +827,9 @@ typedef U32 (*ZSTD_getAllMatchesFn)(
     U32 const ll0,
     U32 const lengthToBeat);
 
-FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+U32 ZSTD_btGetAllMatches_internal(
         ZSTD_match_t* matches,
         ZSTD_matchState_t* ms,
         U32* nextToUpdate3,
@@ -1035,11 +1048,6 @@ ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
 *  Optimal parser
 *********************************/
 
-static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
-{
-    return sol.litlen + sol.mlen;
-}
-
 #if 0 /* debug */
 
 static void
@@ -1057,7 +1065,13 @@ listStats(const U32* table, int lastEltID)
 
 #endif
 
-FORCE_INLINE_TEMPLATE size_t
+#define LIT_PRICE(_p) (int)ZSTD_rawLiteralsCost(_p, 1, optStatePtr, optLevel)
+#define LL_PRICE(_l) (int)ZSTD_litLengthPrice(_l, optStatePtr, optLevel)
+#define LL_INCPRICE(_l) (LL_PRICE(_l) - LL_PRICE(_l-1))
+
+FORCE_INLINE_TEMPLATE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t
 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                                seqStore_t* seqStore,
                                U32 rep[ZSTD_REP_NUM],
@@ -1083,10 +1097,10 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
     ZSTD_optimal_t* const opt = optStatePtr->priceTable;
     ZSTD_match_t* const matches = optStatePtr->matchTable;
-    ZSTD_optimal_t lastSequence;
+    ZSTD_optimal_t lastStretch;
     ZSTD_optLdm_t optLdm;
 
-    ZSTD_memset(&lastSequence, 0, sizeof(ZSTD_optimal_t));
+    ZSTD_memset(&lastStretch, 0, sizeof(ZSTD_optimal_t));
 
     optLdm.seqStore = ms->ldmSeqStore ? *ms->ldmSeqStore : kNullRawSeqStore;
     optLdm.endPosInBlock = optLdm.startPosInBlock = optLdm.offset = 0;
@@ -1108,19 +1122,31 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             U32 const ll0 = !litlen;
             U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
-                                              (U32)(ip-istart), (U32)(iend - ip));
-            if (!nbMatches) { ip++; continue; }
+                                              (U32)(ip-istart), (U32)(iend-ip));
+            if (!nbMatches) {
+                DEBUGLOG(8, "no match found at cPos %u", (unsigned)(ip-istart));
+                ip++;
+                continue;
+            }
+
+            /* Match found: let's store this solution, and eventually find more candidates.
+             * During this forward pass, @opt is used to store stretches,
+             * defined as "a match followed by N literals".
+             * Note how this is different from a Sequence, which is "N literals followed by a match".
+             * Storing stretches allows us to store different match predecessors
+             * for each literal position part of a literals run. */
 
             /* initialize opt[0] */
-            { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
-            opt[0].mlen = 0;  /* means is_a_literal */
+            opt[0].mlen = 0;  /* there are only literals so far */
             opt[0].litlen = litlen;
-            /* We don't need to include the actual price of the literals because
-             * it is static for the duration of the forward pass, and is included
-             * in every price. We include the literal length to avoid negative
-             * prices when we subtract the previous literal length.
+            /* No need to include the actual price of the literals before the first match
+             * because it is static for the duration of the forward pass, and is included
+             * in every subsequent price. But, we include the literal length because
+             * the cost variation of litlen depends on the value of litlen.
              */
-            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+            opt[0].price = LL_PRICE(litlen);
+            ZSTD_STATIC_ASSERT(sizeof(opt[0].rep[0]) == sizeof(rep[0]));
+            ZSTD_memcpy(&opt[0].rep, rep, sizeof(opt[0].rep));
 
             /* large match -> immediate encoding */
             {   U32 const maxML = matches[nbMatches-1].len;
@@ -1129,82 +1155,106 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                             nbMatches, maxML, maxOffBase, (U32)(ip-prefixStart));
 
                 if (maxML > sufficient_len) {
-                    lastSequence.litlen = litlen;
-                    lastSequence.mlen = maxML;
-                    lastSequence.off = maxOffBase;
-                    DEBUGLOG(6, "large match (%u>%u), immediate encoding",
+                    lastStretch.litlen = 0;
+                    lastStretch.mlen = maxML;
+                    lastStretch.off = maxOffBase;
+                    DEBUGLOG(6, "large match (%u>%u) => immediate encoding",
                                 maxML, sufficient_len);
                     cur = 0;
-                    last_pos = ZSTD_totalLen(lastSequence);
+                    last_pos = maxML;
                     goto _shortestPath;
             }   }
 
             /* set prices for first matches starting position == 0 */
             assert(opt[0].price >= 0);
-            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 pos;
+            {   U32 pos;
                 U32 matchNb;
                 for (pos = 1; pos < minMatch; pos++) {
-                    opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
+                    opt[pos].price = ZSTD_MAX_PRICE;
+                    opt[pos].mlen = 0;
+                    opt[pos].litlen = litlen + pos;
                 }
                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
                     U32 const offBase = matches[matchNb].off;
                     U32 const end = matches[matchNb].len;
                     for ( ; pos <= end ; pos++ ) {
-                        U32 const matchPrice = ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
-                        U32 const sequencePrice = literalsPrice + matchPrice;
+                        int const matchPrice = (int)ZSTD_getMatchPrice(offBase, pos, optStatePtr, optLevel);
+                        int const sequencePrice = opt[0].price + matchPrice;
                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
-                                    pos, ZSTD_fCost((int)sequencePrice));
+                                    pos, ZSTD_fCost(sequencePrice));
                         opt[pos].mlen = pos;
                         opt[pos].off = offBase;
-                        opt[pos].litlen = litlen;
-                        opt[pos].price = (int)sequencePrice;
-                }   }
+                        opt[pos].litlen = 0; /* end of match */
+                        opt[pos].price = sequencePrice + LL_PRICE(0);
+                    }
+                }
                 last_pos = pos-1;
+                opt[pos].price = ZSTD_MAX_PRICE;
             }
         }
 
         /* check further positions */
         for (cur = 1; cur <= last_pos; cur++) {
             const BYTE* const inr = ip + cur;
-            assert(cur < ZSTD_OPT_NUM);
-            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
+            assert(cur <= ZSTD_OPT_NUM);
+            DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur);
 
             /* Fix current position with one literal if cheaper */
-            {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
+            {   U32 const litlen = opt[cur-1].litlen + 1;
                 int const price = opt[cur-1].price
-                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
-                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
-                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                                + LIT_PRICE(ip+cur-1)
+                                + LL_INCPRICE(litlen);
                 assert(price < 1000000000); /* overflow check */
                 if (price <= opt[cur].price) {
+                    ZSTD_optimal_t const prevMatch = opt[cur];
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
                                 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
                                 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
-                    opt[cur].mlen = 0;
-                    opt[cur].off = 0;
+                    opt[cur] = opt[cur-1];
                     opt[cur].litlen = litlen;
                     opt[cur].price = price;
+                    if ( (optLevel >= 1) /* additional check only for higher modes */
+                      && (prevMatch.litlen == 0) /* replace a match */
+                      && (LL_INCPRICE(1) < 0) /* ll1 is cheaper than ll0 */
+                      && LIKELY(ip + cur < iend)
+                    ) {
+                        /* check next position, in case it would be cheaper */
+                        int with1literal = prevMatch.price + LIT_PRICE(ip+cur) + LL_INCPRICE(1);
+                        int withMoreLiterals = price + LIT_PRICE(ip+cur) + LL_INCPRICE(litlen+1);
+                        DEBUGLOG(7, "then at next rPos %u : match+1lit %.2f vs %ulits %.2f",
+                                cur+1, ZSTD_fCost(with1literal), litlen+1, ZSTD_fCost(withMoreLiterals));
+                        if ( (with1literal < withMoreLiterals)
+                          && (with1literal < opt[cur+1].price) ) {
+                            /* update offset history - before it disappears */
+                            U32 const prev = cur - prevMatch.mlen;
+                            repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, prevMatch.off, opt[prev].litlen==0);
+                            assert(cur >= prevMatch.mlen);
+                            DEBUGLOG(7, "==> match+1lit is cheaper (%.2f < %.2f) (hist:%u,%u,%u) !",
+                                        ZSTD_fCost(with1literal), ZSTD_fCost(withMoreLiterals),
+                                        newReps.rep[0], newReps.rep[1], newReps.rep[2] );
+                            opt[cur+1] = prevMatch;  /* mlen & offbase */
+                            ZSTD_memcpy(opt[cur+1].rep, &newReps, sizeof(repcodes_t));
+                            opt[cur+1].litlen = 1;
+                            opt[cur+1].price = with1literal;
+                            if (last_pos < cur+1) last_pos = cur+1;
+                        }
+                    }
                 } else {
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
-                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
-                                opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f)",
+                                inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price));
                 }
             }
 
-            /* Set the repcodes of the current position. We must do it here
-             * because we rely on the repcodes of the 2nd to last sequence being
-             * correct to set the next chunks repcodes during the backward
-             * traversal.
+            /* Offset history is not updated during match comparison.
+             * Do it here, now that the match is selected and confirmed.
              */
             ZSTD_STATIC_ASSERT(sizeof(opt[cur].rep) == sizeof(repcodes_t));
             assert(cur >= opt[cur].mlen);
-            if (opt[cur].mlen != 0) {
+            if (opt[cur].litlen == 0) {
+                /* just finished a match => alter offset history */
                 U32 const prev = cur - opt[cur].mlen;
-                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[prev].litlen==0);
                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
-            } else {
-                ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
             }
 
             /* last match must start at a minimum distance of 8 from oend */
@@ -1214,15 +1264,14 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
             if ( (optLevel==0) /*static_test*/
               && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
-                DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
+                DEBUGLOG(7, "skip current position : next rPos(%u) price is cheaper", cur+1);
                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
             }
 
             assert(opt[cur].price >= 0);
-            {   U32 const ll0 = (opt[cur].mlen != 0);
-                U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
-                U32 const previousPrice = (U32)opt[cur].price;
-                U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+            {   U32 const ll0 = (opt[cur].litlen == 0);
+                int const previousPrice = opt[cur].price;
+                int const basePrice = previousPrice + LL_PRICE(0);
                 U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
                 U32 matchNb;
 
@@ -1234,18 +1283,17 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     continue;
                 }
 
-                {   U32 const maxML = matches[nbMatches-1].len;
-                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
-                                inr-istart, cur, nbMatches, maxML);
-
-                    if ( (maxML > sufficient_len)
-                      || (cur + maxML >= ZSTD_OPT_NUM) ) {
-                        lastSequence.mlen = maxML;
-                        lastSequence.off = matches[nbMatches-1].off;
-                        lastSequence.litlen = litlen;
-                        cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0;  /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
-                        last_pos = cur + ZSTD_totalLen(lastSequence);
-                        if (cur > ZSTD_OPT_NUM) cur = 0;   /* underflow => first match */
+                {   U32 const longestML = matches[nbMatches-1].len;
+                    DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of longest ML=%u",
+                                inr-istart, cur, nbMatches, longestML);
+
+                    if ( (longestML > sufficient_len)
+                      || (cur + longestML >= ZSTD_OPT_NUM)
+                      || (ip + cur + longestML >= iend) ) {
+                        lastStretch.mlen = longestML;
+                        lastStretch.off = matches[nbMatches-1].off;
+                        lastStretch.litlen = 0;
+                        last_pos = cur + longestML;
                         goto _shortestPath;
                 }   }
 
@@ -1257,19 +1305,24 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                     U32 mlen;
 
                     DEBUGLOG(7, "testing match %u => offBase=%4u, mlen=%2u, llen=%2u",
-                                matchNb, matches[matchNb].off, lastML, litlen);
+                                matchNb, matches[matchNb].off, lastML, opt[cur].litlen);
 
                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
                         U32 const pos = cur + mlen;
-                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+                        int const price = basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
 
                         if ((pos > last_pos) || (price < opt[pos].price)) {
                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
                                         pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
-                            while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }   /* fill empty positions */
+                            while (last_pos < pos) {
+                                /* fill empty positions, for future comparisons */
+                                last_pos++;
+                                opt[last_pos].price = ZSTD_MAX_PRICE;
+                                opt[last_pos].litlen = !0;  /* just needs to be != 0, to mean "not an end of match" */
+                            }
                             opt[pos].mlen = mlen;
                             opt[pos].off = offset;
-                            opt[pos].litlen = litlen;
+                            opt[pos].litlen = 0;
                             opt[pos].price = price;
                         } else {
                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
@@ -1277,47 +1330,81 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                             if (optLevel==0) break;  /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
                         }
             }   }   }
+            opt[last_pos+1].price = ZSTD_MAX_PRICE;
         }  /* for (cur = 1; cur <= last_pos; cur++) */
 
-        lastSequence = opt[last_pos];
-        cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0;  /* single sequence, and it starts before `ip` */
-        assert(cur < ZSTD_OPT_NUM);  /* control overflow*/
+        lastStretch = opt[last_pos];
+        assert(cur >= lastStretch.mlen);
+        cur = last_pos - lastStretch.mlen;
 
 _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
         assert(opt[0].mlen == 0);
+        assert(last_pos >= lastStretch.mlen);
+        assert(cur == last_pos - lastStretch.mlen);
 
-        /* Set the next chunk's repcodes based on the repcodes of the beginning
-         * of the last match, and the last sequence. This avoids us having to
-         * update them while traversing the sequences.
-         */
-        if (lastSequence.mlen != 0) {
-            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
-            ZSTD_memcpy(rep, &reps, sizeof(reps));
+        if (lastStretch.mlen==0) {
+            /* no solution : all matches have been converted into literals */
+            assert(lastStretch.litlen == (ip - anchor) + last_pos);
+            ip += last_pos;
+            continue;
+        }
+        assert(lastStretch.off > 0);
+
+        /* Update offset history */
+        if (lastStretch.litlen == 0) {
+            /* finishing on a match : update offset history */
+            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastStretch.off, opt[cur].litlen==0);
+            ZSTD_memcpy(rep, &reps, sizeof(repcodes_t));
         } else {
-            ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
+            ZSTD_memcpy(rep, lastStretch.rep, sizeof(repcodes_t));
+            assert(cur >= lastStretch.litlen);
+            cur -= lastStretch.litlen;
         }
 
-        {   U32 const storeEnd = cur + 1;
+        /* Let's write the shortest path solution.
+         * It is stored in @opt in reverse order,
+         * starting from @storeEnd (==cur+2),
+         * effectively partially @opt overwriting.
+         * Content is changed too:
+         * - So far, @opt stored stretches, aka a match followed by literals
+         * - Now, it will store sequences, aka literals followed by a match
+         */
+        {   U32 const storeEnd = cur + 2;
             U32 storeStart = storeEnd;
-            U32 seqPos = cur;
+            U32 stretchPos = cur;
 
             DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
                         last_pos, cur); (void)last_pos;
-            assert(storeEnd < ZSTD_OPT_NUM);
-            DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
-                        storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
-            opt[storeEnd] = lastSequence;
-            while (seqPos > 0) {
-                U32 const backDist = ZSTD_totalLen(opt[seqPos]);
+            assert(storeEnd < ZSTD_OPT_SIZE);
+            DEBUGLOG(6, "last stretch copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
+                        storeEnd, lastStretch.litlen, lastStretch.mlen, lastStretch.off);
+            if (lastStretch.litlen > 0) {
+                /* last "sequence" is unfinished: just a bunch of literals */
+                opt[storeEnd].litlen = lastStretch.litlen;
+                opt[storeEnd].mlen = 0;
+                storeStart = storeEnd-1;
+                opt[storeStart] = lastStretch;
+            } {
+                opt[storeEnd] = lastStretch;  /* note: litlen will be fixed */
+                storeStart = storeEnd;
+            }
+            while (1) {
+                ZSTD_optimal_t nextStretch = opt[stretchPos];
+                opt[storeStart].litlen = nextStretch.litlen;
+                DEBUGLOG(6, "selected sequence (llen=%u,mlen=%u,ofc=%u)",
+                            opt[storeStart].litlen, opt[storeStart].mlen, opt[storeStart].off);
+                if (nextStretch.mlen == 0) {
+                    /* reaching beginning of segment */
+                    break;
+                }
                 storeStart--;
-                DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
-                            seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
-                opt[storeStart] = opt[seqPos];
-                seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
+                opt[storeStart] = nextStretch; /* note: litlen will be fixed */
+                assert(nextStretch.litlen + nextStretch.mlen <= stretchPos);
+                stretchPos -= nextStretch.litlen + nextStretch.mlen;
             }
 
             /* save sequences */
-            DEBUGLOG(6, "sending selected sequences into seqStore")
+            DEBUGLOG(6, "sending selected sequences into seqStore");
             {   U32 storePos;
                 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
                     U32 const llen = opt[storePos].litlen;
@@ -1339,6 +1426,9 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
                     anchor += advance;
                     ip = anchor;
             }   }
+            DEBUGLOG(7, "new offset history : %u, %u, %u", rep[0], rep[1], rep[2]);
+
+            /* update all costs */
             ZSTD_setBasePrices(optStatePtr, optLevel);
         }
     }   /* while (ip < ilimit) */
@@ -1346,21 +1436,27 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
     /* Return the last literals size */
     return (size_t)(iend - anchor);
 }
+#endif /* build exclusions */
 
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
 static size_t ZSTD_compressBlock_opt0(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
 {
     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
 }
+#endif
 
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
 static size_t ZSTD_compressBlock_opt2(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
 {
     return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
 }
+#endif
 
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btopt(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
@@ -1368,20 +1464,23 @@ size_t ZSTD_compressBlock_btopt(
     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
 }
+#endif
 
 
 
 
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
 /* ZSTD_initStats_ultra():
  * make a first compression pass, just to seed stats with more accurate starting values.
  * only works on first block, with no dictionary and no ldm.
  * this function cannot error out, its narrow contract must be respected.
  */
-static void
-ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
-                     seqStore_t* seqStore,
-                     U32 rep[ZSTD_REP_NUM],
-               const void* src, size_t srcSize)
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+void ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
+                          seqStore_t* seqStore,
+                          U32 rep[ZSTD_REP_NUM],
+                    const void* src, size_t srcSize)
 {
     U32 tmpRep[ZSTD_REP_NUM];  /* updated rep codes will sink here */
     ZSTD_memcpy(tmpRep, rep, sizeof(tmpRep));
@@ -1425,7 +1524,7 @@ size_t ZSTD_compressBlock_btultra2(
      * Consequently, this can only work if no data has been previously loaded in tables,
      * aka, no dictionary, no prefix, no ldm preprocessing.
      * The compression ratio gain is generally small (~0.5% on first block),
-    ** the cost is 2x cpu time on first block. */
+     * the cost is 2x cpu time on first block. */
     assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
     if ( (ms->opt.litLengthSum==0)   /* first block */
       && (seqStore->sequences == seqStore->sequencesStart)  /* no ldm */
@@ -1438,7 +1537,9 @@ size_t ZSTD_compressBlock_btultra2(
 
     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
 }
+#endif
 
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btopt_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
@@ -1446,18 +1547,20 @@ size_t ZSTD_compressBlock_btopt_dictMatchState(
     return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
 }
 
-size_t ZSTD_compressBlock_btultra_dictMatchState(
+size_t ZSTD_compressBlock_btopt_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
 }
+#endif
 
-size_t ZSTD_compressBlock_btopt_extDict(
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btultra_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
+    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
 }
 
 size_t ZSTD_compressBlock_btultra_extDict(
@@ -1466,6 +1569,7 @@ size_t ZSTD_compressBlock_btultra_extDict(
 {
     return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
 }
+#endif
 
 /* note : no btultra2 variant for extDict nor dictMatchState,
  * because btultra2 is not meant to work with dictionaries
diff --git a/thirdparty/zstd/compress/zstd_opt.h b/thirdparty/zstd/compress/zstd_opt.h
index 342e5a3112..d4e7113157 100644
--- a/thirdparty/zstd/compress/zstd_opt.h
+++ b/thirdparty/zstd/compress/zstd_opt.h
@@ -17,30 +17,40 @@ extern "C" {
 
 #include "zstd_compress_internal.h"
 
+#if !defined(ZSTD_EXCLUDE_BTLAZY2_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR) \
+ || !defined(ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR)
 /* used in ZSTD_loadDictionaryContent() */
 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
+#endif
 
+#ifndef ZSTD_EXCLUDE_BTOPT_BLOCK_COMPRESSOR
 size_t ZSTD_compressBlock_btopt(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra(
+size_t ZSTD_compressBlock_btopt_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-size_t ZSTD_compressBlock_btultra2(
+size_t ZSTD_compressBlock_btopt_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
 
+#define ZSTD_COMPRESSBLOCK_BTOPT ZSTD_compressBlock_btopt
+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE ZSTD_compressBlock_btopt_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT ZSTD_compressBlock_btopt_extDict
+#else
+#define ZSTD_COMPRESSBLOCK_BTOPT NULL
+#define ZSTD_COMPRESSBLOCK_BTOPT_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTOPT_EXTDICT NULL
+#endif
 
-size_t ZSTD_compressBlock_btopt_dictMatchState(
+#ifndef ZSTD_EXCLUDE_BTULTRA_BLOCK_COMPRESSOR
+size_t ZSTD_compressBlock_btultra(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btultra_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-
-size_t ZSTD_compressBlock_btopt_extDict(
-        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btultra_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
@@ -48,6 +58,20 @@ size_t ZSTD_compressBlock_btultra_extDict(
         /* note : no btultra2 variant for extDict nor dictMatchState,
          * because btultra2 is not meant to work with dictionaries
          * and is only specific for the first block (no prefix) */
+size_t ZSTD_compressBlock_btultra2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+
+#define ZSTD_COMPRESSBLOCK_BTULTRA ZSTD_compressBlock_btultra
+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE ZSTD_compressBlock_btultra_dictMatchState
+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT ZSTD_compressBlock_btultra_extDict
+#define ZSTD_COMPRESSBLOCK_BTULTRA2 ZSTD_compressBlock_btultra2
+#else
+#define ZSTD_COMPRESSBLOCK_BTULTRA NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA_DICTMATCHSTATE NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA_EXTDICT NULL
+#define ZSTD_COMPRESSBLOCK_BTULTRA2 NULL
+#endif
 
 #if defined (__cplusplus)
 }
diff --git a/thirdparty/zstd/compress/zstdmt_compress.c b/thirdparty/zstd/compress/zstdmt_compress.c
index 6786075569..86ccce3184 100644
--- a/thirdparty/zstd/compress/zstdmt_compress.c
+++ b/thirdparty/zstd/compress/zstdmt_compress.c
@@ -15,17 +15,13 @@
 #endif
 
 
-/* ======   Constants   ====== */
-#define ZSTDMT_OVERLAPLOG_DEFAULT 0
-
-
 /* ======   Dependencies   ====== */
-#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+#include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memset, INT_MAX, UINT_MAX */
 #include "../common/mem.h"         /* MEM_STATIC */
 #include "../common/pool.h"        /* threadpool */
 #include "../common/threading.h"   /* mutex */
-#include "zstd_compress_internal.h"  /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
+#include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */
 #include "zstd_ldm.h"
 #include "zstdmt_compress.h"
 
@@ -44,12 +40,13 @@
 #  include <unistd.h>
 #  include <sys/times.h>
 
-#  define DEBUG_PRINTHEX(l,p,n) {            \
-    unsigned debug_u;                        \
-    for (debug_u=0; debug_u<(n); debug_u++)  \
-        RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
-    RAWLOG(l, " \n");                        \
-}
+#  define DEBUG_PRINTHEX(l,p,n)                                       \
+    do {                                                              \
+        unsigned debug_u;                                             \
+        for (debug_u=0; debug_u<(n); debug_u++)                       \
+            RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
+        RAWLOG(l, " \n");                                             \
+    } while (0)
 
 static unsigned long long GetCurrentClockTimeMicroseconds(void)
 {
@@ -61,25 +58,28 @@ static unsigned long long GetCurrentClockTimeMicroseconds(void)
 }  }
 
 #define MUTEX_WAIT_TIME_DLEVEL 6
-#define ZSTD_PTHREAD_MUTEX_LOCK(mutex) {          \
-    if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) {   \
-        unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
-        ZSTD_pthread_mutex_lock(mutex);           \
-        {   unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
-            unsigned long long const elapsedTime = (afterTime-beforeTime); \
-            if (elapsedTime > 1000) {  /* or whatever threshold you like; I'm using 1 millisecond here */ \
-                DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL, "Thread took %llu microseconds to acquire mutex %s \n", \
-                   elapsedTime, #mutex);          \
-        }   }                                     \
-    } else {                                      \
-        ZSTD_pthread_mutex_lock(mutex);           \
-    }                                             \
-}
+#define ZSTD_PTHREAD_MUTEX_LOCK(mutex)                                                  \
+    do {                                                                                \
+        if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) {                                     \
+            unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds();    \
+            ZSTD_pthread_mutex_lock(mutex);                                             \
+            {   unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
+                unsigned long long const elapsedTime = (afterTime-beforeTime);          \
+                if (elapsedTime > 1000) {                                               \
+                    /* or whatever threshold you like; I'm using 1 millisecond here */  \
+                    DEBUGLOG(MUTEX_WAIT_TIME_DLEVEL,                                    \
+                        "Thread took %llu microseconds to acquire mutex %s \n",         \
+                        elapsedTime, #mutex);                                           \
+            }   }                                                                       \
+        } else {                                                                        \
+            ZSTD_pthread_mutex_lock(mutex);                                             \
+        }                                                                               \
+    } while (0)
 
 #else
 
 #  define ZSTD_PTHREAD_MUTEX_LOCK(m) ZSTD_pthread_mutex_lock(m)
-#  define DEBUG_PRINTHEX(l,p,n) {}
+#  define DEBUG_PRINTHEX(l,p,n) do { } while (0)
 
 #endif
 
@@ -100,18 +100,39 @@ typedef struct ZSTDMT_bufferPool_s {
     unsigned totalBuffers;
     unsigned nbBuffers;
     ZSTD_customMem cMem;
-    buffer_t bTable[1];   /* variable size */
+    buffer_t* buffers;
 } ZSTDMT_bufferPool;
 
+static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
+{
+    DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
+    if (!bufPool) return;   /* compatibility with free on NULL */
+    if (bufPool->buffers) {
+        unsigned u;
+        for (u=0; u<bufPool->totalBuffers; u++) {
+            DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->buffers[u].start);
+            ZSTD_customFree(bufPool->buffers[u].start, bufPool->cMem);
+        }
+        ZSTD_customFree(bufPool->buffers, bufPool->cMem);
+    }
+    ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
+    ZSTD_customFree(bufPool, bufPool->cMem);
+}
+
 static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_customMem cMem)
 {
-    ZSTDMT_bufferPool* const bufPool = (ZSTDMT_bufferPool*)ZSTD_customCalloc(
-        sizeof(ZSTDMT_bufferPool) + (maxNbBuffers-1) * sizeof(buffer_t), cMem);
+    ZSTDMT_bufferPool* const bufPool =
+        (ZSTDMT_bufferPool*)ZSTD_customCalloc(sizeof(ZSTDMT_bufferPool), cMem);
     if (bufPool==NULL) return NULL;
     if (ZSTD_pthread_mutex_init(&bufPool->poolMutex, NULL)) {
         ZSTD_customFree(bufPool, cMem);
         return NULL;
     }
+    bufPool->buffers = (buffer_t*)ZSTD_customCalloc(maxNbBuffers * sizeof(buffer_t), cMem);
+    if (bufPool->buffers==NULL) {
+        ZSTDMT_freeBufferPool(bufPool);
+        return NULL;
+    }
     bufPool->bufferSize = 64 KB;
     bufPool->totalBuffers = maxNbBuffers;
     bufPool->nbBuffers = 0;
@@ -119,32 +140,19 @@ static ZSTDMT_bufferPool* ZSTDMT_createBufferPool(unsigned maxNbBuffers, ZSTD_cu
     return bufPool;
 }
 
-static void ZSTDMT_freeBufferPool(ZSTDMT_bufferPool* bufPool)
-{
-    unsigned u;
-    DEBUGLOG(3, "ZSTDMT_freeBufferPool (address:%08X)", (U32)(size_t)bufPool);
-    if (!bufPool) return;   /* compatibility with free on NULL */
-    for (u=0; u<bufPool->totalBuffers; u++) {
-        DEBUGLOG(4, "free buffer %2u (address:%08X)", u, (U32)(size_t)bufPool->bTable[u].start);
-        ZSTD_customFree(bufPool->bTable[u].start, bufPool->cMem);
-    }
-    ZSTD_pthread_mutex_destroy(&bufPool->poolMutex);
-    ZSTD_customFree(bufPool, bufPool->cMem);
-}
-
 /* only works at initialization, not during compression */
 static size_t ZSTDMT_sizeof_bufferPool(ZSTDMT_bufferPool* bufPool)
 {
-    size_t const poolSize = sizeof(*bufPool)
-                          + (bufPool->totalBuffers - 1) * sizeof(buffer_t);
+    size_t const poolSize = sizeof(*bufPool);
+    size_t const arraySize = bufPool->totalBuffers * sizeof(buffer_t);
     unsigned u;
     size_t totalBufferSize = 0;
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
     for (u=0; u<bufPool->totalBuffers; u++)
-        totalBufferSize += bufPool->bTable[u].capacity;
+        totalBufferSize += bufPool->buffers[u].capacity;
     ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
 
-    return poolSize + totalBufferSize;
+    return poolSize + arraySize + totalBufferSize;
 }
 
 /* ZSTDMT_setBufferSize() :
@@ -187,9 +195,9 @@ static buffer_t ZSTDMT_getBuffer(ZSTDMT_bufferPool* bufPool)
     DEBUGLOG(5, "ZSTDMT_getBuffer: bSize = %u", (U32)bufPool->bufferSize);
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
     if (bufPool->nbBuffers) {   /* try to use an existing buffer */
-        buffer_t const buf = bufPool->bTable[--(bufPool->nbBuffers)];
+        buffer_t const buf = bufPool->buffers[--(bufPool->nbBuffers)];
         size_t const availBufferSize = buf.capacity;
-        bufPool->bTable[bufPool->nbBuffers] = g_nullBuffer;
+        bufPool->buffers[bufPool->nbBuffers] = g_nullBuffer;
         if ((availBufferSize >= bSize) & ((availBufferSize>>3) <= bSize)) {
             /* large enough, but not too much */
             DEBUGLOG(5, "ZSTDMT_getBuffer: provide buffer %u of size %u",
@@ -250,14 +258,14 @@ static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
     if (buf.start == NULL) return;   /* compatible with release on NULL */
     ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
     if (bufPool->nbBuffers < bufPool->totalBuffers) {
-        bufPool->bTable[bufPool->nbBuffers++] = buf;  /* stored for later use */
+        bufPool->buffers[bufPool->nbBuffers++] = buf;  /* stored for later use */
         DEBUGLOG(5, "ZSTDMT_releaseBuffer: stored buffer of size %u in slot %u",
                     (U32)buf.capacity, (U32)(bufPool->nbBuffers-1));
         ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
         return;
     }
     ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
-    /* Reached bufferPool capacity (should not happen) */
+    /* Reached bufferPool capacity (note: should not happen) */
     DEBUGLOG(5, "ZSTDMT_releaseBuffer: pool capacity reached => freeing ");
     ZSTD_customFree(buf.start, bufPool->cMem);
 }
@@ -350,16 +358,20 @@ typedef struct {
     int totalCCtx;
     int availCCtx;
     ZSTD_customMem cMem;
-    ZSTD_CCtx* cctx[1];   /* variable size */
+    ZSTD_CCtx** cctxs;
 } ZSTDMT_CCtxPool;
 
-/* note : all CCtx borrowed from the pool should be released back to the pool _before_ freeing the pool */
+/* note : all CCtx borrowed from the pool must be reverted back to the pool _before_ freeing the pool */
 static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 {
-    int cid;
-    for (cid=0; cid<pool->totalCCtx; cid++)
-        ZSTD_freeCCtx(pool->cctx[cid]);  /* note : compatible with free on NULL */
+    if (!pool) return;
     ZSTD_pthread_mutex_destroy(&pool->poolMutex);
+    if (pool->cctxs) {
+        int cid;
+        for (cid=0; cid<pool->totalCCtx; cid++)
+            ZSTD_freeCCtx(pool->cctxs[cid]);  /* free compatible with NULL */
+        ZSTD_customFree(pool->cctxs, pool->cMem);
+    }
     ZSTD_customFree(pool, pool->cMem);
 }
 
@@ -368,19 +380,24 @@ static void ZSTDMT_freeCCtxPool(ZSTDMT_CCtxPool* pool)
 static ZSTDMT_CCtxPool* ZSTDMT_createCCtxPool(int nbWorkers,
                                               ZSTD_customMem cMem)
 {
-    ZSTDMT_CCtxPool* const cctxPool = (ZSTDMT_CCtxPool*) ZSTD_customCalloc(
-        sizeof(ZSTDMT_CCtxPool) + (nbWorkers-1)*sizeof(ZSTD_CCtx*), cMem);
+    ZSTDMT_CCtxPool* const cctxPool =
+        (ZSTDMT_CCtxPool*) ZSTD_customCalloc(sizeof(ZSTDMT_CCtxPool), cMem);
     assert(nbWorkers > 0);
     if (!cctxPool) return NULL;
     if (ZSTD_pthread_mutex_init(&cctxPool->poolMutex, NULL)) {
         ZSTD_customFree(cctxPool, cMem);
         return NULL;
     }
-    cctxPool->cMem = cMem;
     cctxPool->totalCCtx = nbWorkers;
+    cctxPool->cctxs = (ZSTD_CCtx**)ZSTD_customCalloc(nbWorkers * sizeof(ZSTD_CCtx*), cMem);
+    if (!cctxPool->cctxs) {
+        ZSTDMT_freeCCtxPool(cctxPool);
+        return NULL;
+    }
+    cctxPool->cMem = cMem;
+    cctxPool->cctxs[0] = ZSTD_createCCtx_advanced(cMem);
+    if (!cctxPool->cctxs[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
     cctxPool->availCCtx = 1;   /* at least one cctx for single-thread mode */
-    cctxPool->cctx[0] = ZSTD_createCCtx_advanced(cMem);
-    if (!cctxPool->cctx[0]) { ZSTDMT_freeCCtxPool(cctxPool); return NULL; }
     DEBUGLOG(3, "cctxPool created, with %u workers", nbWorkers);
     return cctxPool;
 }
@@ -402,16 +419,16 @@ static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
 {
     ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
     {   unsigned const nbWorkers = cctxPool->totalCCtx;
-        size_t const poolSize = sizeof(*cctxPool)
-                                + (nbWorkers-1) * sizeof(ZSTD_CCtx*);
-        unsigned u;
+        size_t const poolSize = sizeof(*cctxPool);
+        size_t const arraySize = cctxPool->totalCCtx * sizeof(ZSTD_CCtx*);
         size_t totalCCtxSize = 0;
+        unsigned u;
         for (u=0; u<nbWorkers; u++) {
-            totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctx[u]);
+            totalCCtxSize += ZSTD_sizeof_CCtx(cctxPool->cctxs[u]);
         }
         ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
         assert(nbWorkers > 0);
-        return poolSize + totalCCtxSize;
+        return poolSize + arraySize + totalCCtxSize;
     }
 }
 
@@ -421,7 +438,7 @@ static ZSTD_CCtx* ZSTDMT_getCCtx(ZSTDMT_CCtxPool* cctxPool)
     ZSTD_pthread_mutex_lock(&cctxPool->poolMutex);
     if (cctxPool->availCCtx) {
         cctxPool->availCCtx--;
-        {   ZSTD_CCtx* const cctx = cctxPool->cctx[cctxPool->availCCtx];
+        {   ZSTD_CCtx* const cctx = cctxPool->cctxs[cctxPool->availCCtx];
             ZSTD_pthread_mutex_unlock(&cctxPool->poolMutex);
             return cctx;
     }   }
@@ -435,7 +452,7 @@ static void ZSTDMT_releaseCCtx(ZSTDMT_CCtxPool* pool, ZSTD_CCtx* cctx)
     if (cctx==NULL) return;   /* compatibility with release on NULL */
     ZSTD_pthread_mutex_lock(&pool->poolMutex);
     if (pool->availCCtx < pool->totalCCtx)
-        pool->cctx[pool->availCCtx++] = cctx;
+        pool->cctxs[pool->availCCtx++] = cctx;
     else {
         /* pool overflow : should not happen, since totalCCtx==nbWorkers */
         DEBUGLOG(4, "CCtx pool overflow : free cctx");
@@ -601,11 +618,8 @@ static void ZSTDMT_serialState_update(serialState_t* serialState,
     ZSTD_pthread_mutex_unlock(&serialState->mutex);
 
     if (seqStore.size > 0) {
-        size_t const err = ZSTD_referenceExternalSequences(
-            jobCCtx, seqStore.seq, seqStore.size);
+        ZSTD_referenceExternalSequences(jobCCtx, seqStore.seq, seqStore.size);
         assert(serialState->params.ldmParams.enableLdm == ZSTD_ps_enable);
-        assert(!ZSTD_isError(err));
-        (void)err;
     }
 }
 
@@ -657,12 +671,13 @@ typedef struct {
     unsigned frameChecksumNeeded;        /* used only by mtctx */
 } ZSTDMT_jobDescription;
 
-#define JOB_ERROR(e) {                          \
-    ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);   \
-    job->cSize = e;                             \
-    ZSTD_pthread_mutex_unlock(&job->job_mutex); \
-    goto _endJob;                               \
-}
+#define JOB_ERROR(e)                                \
+    do {                                            \
+        ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);   \
+        job->cSize = e;                             \
+        ZSTD_pthread_mutex_unlock(&job->job_mutex); \
+        goto _endJob;                               \
+    } while (0)
 
 /* ZSTDMT_compressionJob() is a POOL_function type */
 static void ZSTDMT_compressionJob(void* jobDescription)
@@ -1091,7 +1106,7 @@ ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
     {   unsigned jobNb;
         unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
         DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
-                    mtctx->doneJobID, lastJobNb, mtctx->jobReady)
+                    mtctx->doneJobID, lastJobNb, mtctx->jobReady);
         for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
             unsigned const wJobID = jobNb & mtctx->jobIDMask;
             ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
diff --git a/thirdparty/zstd/decompress/huf_decompress.c b/thirdparty/zstd/decompress/huf_decompress.c
index 5b217ac586..f85dd0beea 100644
--- a/thirdparty/zstd/decompress/huf_decompress.c
+++ b/thirdparty/zstd/decompress/huf_decompress.c
@@ -34,6 +34,12 @@
 *  Macros
 ****************************************************************/
 
+#ifdef HUF_DISABLE_FAST_DECODE
+# define HUF_ENABLE_FAST_DECODE 0
+#else
+# define HUF_ENABLE_FAST_DECODE 1
+#endif
+
 /* These two optional macros force the use one way or another of the two
  * Huffman decompression implementations. You can't force in both directions
  * at the same time.
@@ -158,17 +164,18 @@ static size_t HUF_initFastDStream(BYTE const* ip) {
  * op [in/out] - The output pointers, must be updated to reflect what is written.
  * bits [in/out] - The bitstream containers, must be updated to reflect the current state.
  * dt [in] - The decoding table.
- * ilimit [in] - The input limit, stop when any input pointer is below ilimit.
+ * ilowest [in] - The beginning of the valid range of the input. Decoders may read
+ *                down to this pointer. It may be below iend[0].
  * oend [in] - The end of the output stream. op[3] must not cross oend.
  * iend [in] - The end of each input stream. ip[i] may cross iend[i],
- *             as long as it is above ilimit, but that indicates corruption.
+ *             as long as it is above ilowest, but that indicates corruption.
  */
 typedef struct {
     BYTE const* ip[4];
     BYTE* op[4];
     U64 bits[4];
     void const* dt;
-    BYTE const* ilimit;
+    BYTE const* ilowest;
     BYTE* oend;
     BYTE const* iend[4];
 } HUF_DecompressFastArgs;
@@ -186,9 +193,9 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
     void const* dt = DTable + 1;
     U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
 
-    const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
+    const BYTE* const istart = (const BYTE*)src;
 
-    BYTE* const oend = (BYTE*)dst + dstSize;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
 
     /* The fast decoding loop assumes 64-bit little-endian.
      * This condition is false on x32.
@@ -196,6 +203,11 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
     if (!MEM_isLittleEndian() || MEM_32bits())
         return 0;
 
+    /* Avoid nullptr addition */
+    if (dstSize == 0)
+        return 0;
+    assert(dst != NULL);
+
     /* strict minimum : jump table + 1 byte per stream */
     if (srcSize < 10)
         return ERROR(corruption_detected);
@@ -209,7 +221,6 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
 
     /* Read the jump table. */
     {
-        const BYTE* const istart = (const BYTE*)src;
         size_t const length1 = MEM_readLE16(istart);
         size_t const length2 = MEM_readLE16(istart+2);
         size_t const length3 = MEM_readLE16(istart+4);
@@ -221,10 +232,8 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
 
         /* HUF_initFastDStream() requires this, and this small of an input
          * won't benefit from the ASM loop anyways.
-         * length1 must be >= 16 so that ip[0] >= ilimit before the loop
-         * starts.
          */
-        if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
+        if (length1 < 8 || length2 < 8 || length3 < 8 || length4 < 8)
             return 0;
         if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
     }
@@ -256,11 +265,12 @@ static size_t HUF_DecompressFastArgs_init(HUF_DecompressFastArgs* args, void* ds
     args->bits[2] = HUF_initFastDStream(args->ip[2]);
     args->bits[3] = HUF_initFastDStream(args->ip[3]);
 
-    /* If ip[] >= ilimit, it is guaranteed to be safe to
-        * reload bits[]. It may be beyond its section, but is
-        * guaranteed to be valid (>= istart).
-        */
-    args->ilimit = ilimit;
+    /* The decoders must be sure to never read beyond ilowest.
+     * This is lower than iend[0], but allowing decoders to read
+     * down to ilowest can allow an extra iteration or two in the
+     * fast loop.
+     */
+    args->ilowest = istart;
 
     args->oend = oend;
     args->dt = dt;
@@ -285,13 +295,31 @@ static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressFastArg
     assert(sizeof(size_t) == 8);
     bit->bitContainer = MEM_readLEST(args->ip[stream]);
     bit->bitsConsumed = ZSTD_countTrailingZeros64(args->bits[stream]);
-    bit->start = (const char*)args->iend[0];
+    bit->start = (const char*)args->ilowest;
     bit->limitPtr = bit->start + sizeof(size_t);
     bit->ptr = (const char*)args->ip[stream];
 
     return 0;
 }
 
+/* Calls X(N) for each stream 0, 1, 2, 3. */
+#define HUF_4X_FOR_EACH_STREAM(X) \
+    do {                          \
+        X(0);                     \
+        X(1);                     \
+        X(2);                     \
+        X(3);                     \
+    } while (0)
+
+/* Calls X(N, var) for each stream 0, 1, 2, 3. */
+#define HUF_4X_FOR_EACH_STREAM_WITH_VAR(X, var) \
+    do {                                        \
+        X(0, (var));                            \
+        X(1, (var));                            \
+        X(2, (var));                            \
+        X(3, (var));                            \
+    } while (0)
+
 
 #ifndef HUF_FORCE_DECOMPRESS_X2
 
@@ -500,15 +528,19 @@ HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog
 }
 
 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
-    *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
+    do { *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog); } while (0)
 
-#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)  \
-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+#define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr)      \
+    do {                                            \
+        if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
+            HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
+    } while (0)
 
-#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
-    if (MEM_64bits()) \
-        HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
+#define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr)      \
+    do {                                            \
+        if (MEM_64bits())                           \
+            HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr); \
+    } while (0)
 
 HINT_INLINE size_t
 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
@@ -546,7 +578,7 @@ HUF_decompress1X1_usingDTable_internal_body(
     const HUF_DTable* DTable)
 {
     BYTE* op = (BYTE*)dst;
-    BYTE* const oend = op + dstSize;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd(op, dstSize);
     const void* dtPtr = DTable + 1;
     const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
     BIT_DStream_t bitD;
@@ -574,6 +606,7 @@ HUF_decompress4X1_usingDTable_internal_body(
 {
     /* Check */
     if (cSrcSize < 10) return ERROR(corruption_detected);  /* strict minimum : jump table + 1 byte per stream */
+    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
 
     {   const BYTE* const istart = (const BYTE*) cSrc;
         BYTE* const ostart = (BYTE*) dst;
@@ -609,7 +642,7 @@ HUF_decompress4X1_usingDTable_internal_body(
 
         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
         if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
-        if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
+        assert(dstSize >= 6); /* validated above */
         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@@ -692,7 +725,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
     BYTE* op[4];
     U16 const* const dtable = (U16 const*)args->dt;
     BYTE* const oend = args->oend;
-    BYTE const* const ilimit = args->ilimit;
+    BYTE const* const ilowest = args->ilowest;
 
     /* Copy the arguments to local variables */
     ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
@@ -705,13 +738,12 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
     for (;;) {
         BYTE* olimit;
         int stream;
-        int symbol;
 
         /* Assert loop preconditions */
 #ifndef NDEBUG
         for (stream = 0; stream < 4; ++stream) {
             assert(op[stream] <= (stream == 3 ? oend : op[stream + 1]));
-            assert(ip[stream] >= ilimit);
+            assert(ip[stream] >= ilowest);
         }
 #endif
         /* Compute olimit */
@@ -721,7 +753,7 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
             /* Each iteration consumes up to 11 bits * 5 = 55 bits < 7 bytes
              * per stream.
              */
-            size_t const iiters = (size_t)(ip[0] - ilimit) / 7;
+            size_t const iiters = (size_t)(ip[0] - ilowest) / 7;
             /* We can safely run iters iterations before running bounds checks */
             size_t const iters = MIN(oiters, iiters);
             size_t const symbols = iters * 5;
@@ -732,8 +764,8 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
              */
             olimit = op[3] + symbols;
 
-            /* Exit fast decoding loop once we get close to the end. */
-            if (op[3] + 20 > olimit)
+            /* Exit fast decoding loop once we reach the end. */
+            if (op[3] == olimit)
                 break;
 
             /* Exit the decoding loop if any input pointer has crossed the
@@ -752,27 +784,42 @@ void HUF_decompress4X1_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
         }
 #endif
 
+#define HUF_4X1_DECODE_SYMBOL(_stream, _symbol)                 \
+    do {                                                        \
+        int const index = (int)(bits[(_stream)] >> 53);         \
+        int const entry = (int)dtable[index];                   \
+        bits[(_stream)] <<= (entry & 0x3F);                     \
+        op[(_stream)][(_symbol)] = (BYTE)((entry >> 8) & 0xFF); \
+    } while (0)
+
+#define HUF_4X1_RELOAD_STREAM(_stream)                              \
+    do {                                                            \
+        int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
+        int const nbBits = ctz & 7;                                 \
+        int const nbBytes = ctz >> 3;                               \
+        op[(_stream)] += 5;                                         \
+        ip[(_stream)] -= nbBytes;                                   \
+        bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
+        bits[(_stream)] <<= nbBits;                                 \
+    } while (0)
+
+        /* Manually unroll the loop because compilers don't consistently
+         * unroll the inner loops, which destroys performance.
+         */
         do {
             /* Decode 5 symbols in each of the 4 streams */
-            for (symbol = 0; symbol < 5; ++symbol) {
-                for (stream = 0; stream < 4; ++stream) {
-                    int const index = (int)(bits[stream] >> 53);
-                    int const entry = (int)dtable[index];
-                    bits[stream] <<= (entry & 63);
-                    op[stream][symbol] = (BYTE)((entry >> 8) & 0xFF);
-                }
-            }
-            /* Reload the bitstreams */
-            for (stream = 0; stream < 4; ++stream) {
-                int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
-                int const nbBits = ctz & 7;
-                int const nbBytes = ctz >> 3;
-                op[stream] += 5;
-                ip[stream] -= nbBytes;
-                bits[stream] = MEM_read64(ip[stream]) | 1;
-                bits[stream] <<= nbBits;
-            }
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 1);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 2);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 3);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X1_DECODE_SYMBOL, 4);
+
+            /* Reload each of the 4 the bitstreams */
+            HUF_4X_FOR_EACH_STREAM(HUF_4X1_RELOAD_STREAM);
         } while (op[3] < olimit);
+
+#undef HUF_4X1_DECODE_SYMBOL
+#undef HUF_4X1_RELOAD_STREAM
     }
 
 _out:
@@ -797,8 +844,8 @@ HUF_decompress4X1_usingDTable_internal_fast(
     HUF_DecompressFastLoopFn loopFn)
 {
     void const* dt = DTable + 1;
-    const BYTE* const iend = (const BYTE*)cSrc + 6;
-    BYTE* const oend = (BYTE*)dst + dstSize;
+    BYTE const* const ilowest = (BYTE const*)cSrc;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
     HUF_DecompressFastArgs args;
     {   size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
         FORWARD_IF_ERROR(ret, "Failed to init fast loop args");
@@ -806,18 +853,22 @@ HUF_decompress4X1_usingDTable_internal_fast(
             return 0;
     }
 
-    assert(args.ip[0] >= args.ilimit);
+    assert(args.ip[0] >= args.ilowest);
     loopFn(&args);
 
-    /* Our loop guarantees that ip[] >= ilimit and that we haven't
+    /* Our loop guarantees that ip[] >= ilowest and that we haven't
     * overwritten any op[].
     */
-    assert(args.ip[0] >= iend);
-    assert(args.ip[1] >= iend);
-    assert(args.ip[2] >= iend);
-    assert(args.ip[3] >= iend);
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[1] >= ilowest);
+    assert(args.ip[2] >= ilowest);
+    assert(args.ip[3] >= ilowest);
     assert(args.op[3] <= oend);
-    (void)iend;
+
+    assert(ilowest == args.ilowest);
+    assert(ilowest + 6 == args.iend[0]);
+    (void)ilowest;
 
     /* finish bit streams one by one. */
     {   size_t const segmentSize = (dstSize+3) / 4;
@@ -868,7 +919,7 @@ static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize,
     }
 #endif
 
-    if (!(flags & HUF_flags_disableFast)) {
+    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
         size_t const ret = HUF_decompress4X1_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
         if (ret != 0)
             return ret;
@@ -1239,15 +1290,19 @@ HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, c
 }
 
 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
-    ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+    do { ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); } while (0)
 
-#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
-    if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+#define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr)                     \
+    do {                                                           \
+        if (MEM_64bits() || (HUF_TABLELOG_MAX<=12))                \
+            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+    } while (0)
 
-#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
-    if (MEM_64bits()) \
-        ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
+#define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr)                     \
+    do {                                                           \
+        if (MEM_64bits())                                          \
+            ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog); \
+    } while (0)
 
 HINT_INLINE size_t
 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
@@ -1307,7 +1362,7 @@ HUF_decompress1X2_usingDTable_internal_body(
 
     /* decode */
     {   BYTE* const ostart = (BYTE*) dst;
-        BYTE* const oend = ostart + dstSize;
+        BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, dstSize);
         const void* const dtPtr = DTable+1;   /* force compiler to not use strict-aliasing */
         const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
         DTableDesc const dtd = HUF_getDTableDesc(DTable);
@@ -1332,6 +1387,7 @@ HUF_decompress4X2_usingDTable_internal_body(
     const HUF_DTable* DTable)
 {
     if (cSrcSize < 10) return ERROR(corruption_detected);   /* strict minimum : jump table + 1 byte per stream */
+    if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
 
     {   const BYTE* const istart = (const BYTE*) cSrc;
         BYTE* const ostart = (BYTE*) dst;
@@ -1367,7 +1423,7 @@ HUF_decompress4X2_usingDTable_internal_body(
 
         if (length4 > cSrcSize) return ERROR(corruption_detected);  /* overflow */
         if (opStart4 > oend) return ERROR(corruption_detected);     /* overflow */
-        if (dstSize < 6) return ERROR(corruption_detected);         /* stream 4-split doesn't work */
+        assert(dstSize >= 6 /* validated above */);
         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
@@ -1472,7 +1528,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
     BYTE* op[4];
     BYTE* oend[4];
     HUF_DEltX2 const* const dtable = (HUF_DEltX2 const*)args->dt;
-    BYTE const* const ilimit = args->ilimit;
+    BYTE const* const ilowest = args->ilowest;
 
     /* Copy the arguments to local registers. */
     ZSTD_memcpy(&bits, &args->bits, sizeof(bits));
@@ -1490,13 +1546,12 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
     for (;;) {
         BYTE* olimit;
         int stream;
-        int symbol;
 
         /* Assert loop preconditions */
 #ifndef NDEBUG
         for (stream = 0; stream < 4; ++stream) {
             assert(op[stream] <= oend[stream]);
-            assert(ip[stream] >= ilimit);
+            assert(ip[stream] >= ilowest);
         }
 #endif
         /* Compute olimit */
@@ -1509,7 +1564,7 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
              * We also know that each input pointer is >= ip[0]. So we can run
              * iters loops before running out of input.
              */
-            size_t iters = (size_t)(ip[0] - ilimit) / 7;
+            size_t iters = (size_t)(ip[0] - ilowest) / 7;
             /* Each iteration can produce up to 10 bytes of output per stream.
              * Each output stream my advance at different rates. So take the
              * minimum number of safe iterations among all the output streams.
@@ -1527,8 +1582,8 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
              */
             olimit = op[3] + (iters * 5);
 
-            /* Exit the fast decoding loop if we are too close to the end. */
-            if (op[3] + 10 > olimit)
+            /* Exit the fast decoding loop once we reach the end. */
+            if (op[3] == olimit)
                 break;
 
             /* Exit the decoding loop if any input pointer has crossed the
@@ -1547,54 +1602,58 @@ void HUF_decompress4X2_usingDTable_internal_fast_c_loop(HUF_DecompressFastArgs*
         }
 #endif
 
+#define HUF_4X2_DECODE_SYMBOL(_stream, _decode3)                      \
+    do {                                                              \
+        if ((_decode3) || (_stream) != 3) {                           \
+            int const index = (int)(bits[(_stream)] >> 53);           \
+            HUF_DEltX2 const entry = dtable[index];                   \
+            MEM_write16(op[(_stream)], entry.sequence); \
+            bits[(_stream)] <<= (entry.nbBits) & 0x3F;                \
+            op[(_stream)] += (entry.length);                          \
+        }                                                             \
+    } while (0)
+
+#define HUF_4X2_RELOAD_STREAM(_stream)                                  \
+    do {                                                                \
+        HUF_4X2_DECODE_SYMBOL(3, 1);                                    \
+        {                                                               \
+            int const ctz = ZSTD_countTrailingZeros64(bits[(_stream)]); \
+            int const nbBits = ctz & 7;                                 \
+            int const nbBytes = ctz >> 3;                               \
+            ip[(_stream)] -= nbBytes;                                   \
+            bits[(_stream)] = MEM_read64(ip[(_stream)]) | 1;            \
+            bits[(_stream)] <<= nbBits;                                 \
+        }                                                               \
+    } while (0)
+
+        /* Manually unroll the loop because compilers don't consistently
+         * unroll the inner loops, which destroys performance.
+         */
         do {
-            /* Do 5 table lookups for each of the first 3 streams */
-            for (symbol = 0; symbol < 5; ++symbol) {
-                for (stream = 0; stream < 3; ++stream) {
-                    int const index = (int)(bits[stream] >> 53);
-                    HUF_DEltX2 const entry = dtable[index];
-                    MEM_write16(op[stream], entry.sequence);
-                    bits[stream] <<= (entry.nbBits);
-                    op[stream] += (entry.length);
-                }
-            }
-            /* Do 1 table lookup from the final stream */
-            {
-                int const index = (int)(bits[3] >> 53);
-                HUF_DEltX2 const entry = dtable[index];
-                MEM_write16(op[3], entry.sequence);
-                bits[3] <<= (entry.nbBits);
-                op[3] += (entry.length);
-            }
-            /* Do 4 table lookups from the final stream & reload bitstreams */
-            for (stream = 0; stream < 4; ++stream) {
-                /* Do a table lookup from the final stream.
-                 * This is interleaved with the reloading to reduce register
-                 * pressure. This shouldn't be necessary, but compilers can
-                 * struggle with codegen with high register pressure.
-                 */
-                {
-                    int const index = (int)(bits[3] >> 53);
-                    HUF_DEltX2 const entry = dtable[index];
-                    MEM_write16(op[3], entry.sequence);
-                    bits[3] <<= (entry.nbBits);
-                    op[3] += (entry.length);
-                }
-                /* Reload the bistreams. The final bitstream must be reloaded
-                 * after the 5th symbol was decoded.
-                 */
-                {
-                    int const ctz = ZSTD_countTrailingZeros64(bits[stream]);
-                    int const nbBits = ctz & 7;
-                    int const nbBytes = ctz >> 3;
-                    ip[stream] -= nbBytes;
-                    bits[stream] = MEM_read64(ip[stream]) | 1;
-                    bits[stream] <<= nbBits;
-                }
-            }
+            /* Decode 5 symbols from each of the first 3 streams.
+             * The final stream will be decoded during the reload phase
+             * to reduce register pressure.
+             */
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+            HUF_4X_FOR_EACH_STREAM_WITH_VAR(HUF_4X2_DECODE_SYMBOL, 0);
+
+            /* Decode one symbol from the final stream */
+            HUF_4X2_DECODE_SYMBOL(3, 1);
+
+            /* Decode 4 symbols from the final stream & reload bitstreams.
+             * The final stream is reloaded last, meaning that all 5 symbols
+             * are decoded from the final stream before it is reloaded.
+             */
+            HUF_4X_FOR_EACH_STREAM(HUF_4X2_RELOAD_STREAM);
         } while (op[3] < olimit);
     }
 
+#undef HUF_4X2_DECODE_SYMBOL
+#undef HUF_4X2_RELOAD_STREAM
+
 _out:
 
     /* Save the final values of each of the state variables back to args. */
@@ -1611,8 +1670,8 @@ HUF_decompress4X2_usingDTable_internal_fast(
     const HUF_DTable* DTable,
     HUF_DecompressFastLoopFn loopFn) {
     void const* dt = DTable + 1;
-    const BYTE* const iend = (const BYTE*)cSrc + 6;
-    BYTE* const oend = (BYTE*)dst + dstSize;
+    const BYTE* const ilowest = (const BYTE*)cSrc;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd((BYTE*)dst, dstSize);
     HUF_DecompressFastArgs args;
     {
         size_t const ret = HUF_DecompressFastArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
@@ -1621,16 +1680,19 @@ HUF_decompress4X2_usingDTable_internal_fast(
             return 0;
     }
 
-    assert(args.ip[0] >= args.ilimit);
+    assert(args.ip[0] >= args.ilowest);
     loopFn(&args);
 
     /* note : op4 already verified within main loop */
-    assert(args.ip[0] >= iend);
-    assert(args.ip[1] >= iend);
-    assert(args.ip[2] >= iend);
-    assert(args.ip[3] >= iend);
+    assert(args.ip[0] >= ilowest);
+    assert(args.ip[1] >= ilowest);
+    assert(args.ip[2] >= ilowest);
+    assert(args.ip[3] >= ilowest);
     assert(args.op[3] <= oend);
-    (void)iend;
+
+    assert(ilowest == args.ilowest);
+    assert(ilowest + 6 == args.iend[0]);
+    (void)ilowest;
 
     /* finish bitStreams one by one */
     {
@@ -1679,7 +1741,7 @@ static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize,
     }
 #endif
 
-    if (!(flags & HUF_flags_disableFast)) {
+    if (HUF_ENABLE_FAST_DECODE && !(flags & HUF_flags_disableFast)) {
         size_t const ret = HUF_decompress4X2_usingDTable_internal_fast(dst, dstSize, cSrc, cSrcSize, DTable, loopFn);
         if (ret != 0)
             return ret;
diff --git a/thirdparty/zstd/decompress/huf_decompress_amd64.S b/thirdparty/zstd/decompress/huf_decompress_amd64.S
index 671624fe34..78da291ee3 100644
--- a/thirdparty/zstd/decompress/huf_decompress_amd64.S
+++ b/thirdparty/zstd/decompress/huf_decompress_amd64.S
@@ -10,11 +10,32 @@
 
 #include "../common/portability_macros.h"
 
+#if defined(__ELF__) && defined(__GNUC__)
 /* Stack marking
  * ref: https://wiki.gentoo.org/wiki/Hardened/GNU_stack_quickstart
  */
-#if defined(__ELF__) && defined(__GNUC__)
 .section .note.GNU-stack,"",%progbits
+
+#if defined(__aarch64__)
+/* Mark that this assembly supports BTI & PAC, because it is empty for aarch64.
+ * See: https://github.com/facebook/zstd/issues/3841
+ * See: https://gcc.godbolt.org/z/sqr5T4ffK
+ * See: https://lore.kernel.org/linux-arm-kernel/20200429211641.9279-8-broonie@kernel.org/
+ * See: https://reviews.llvm.org/D62609
+ */
+.pushsection .note.gnu.property, "a"
+.p2align 3
+.long 4                 /* size of the name - "GNU\0" */
+.long 0x10              /* size of descriptor */
+.long 0x5               /* NT_GNU_PROPERTY_TYPE_0 */
+.asciz "GNU"
+.long 0xc0000000        /* pr_type - GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+.long 4                 /* pr_datasz - 4 bytes */
+.long 3                 /* pr_data - GNU_PROPERTY_AARCH64_FEATURE_1_BTI | GNU_PROPERTY_AARCH64_FEATURE_1_PAC */
+.p2align 3              /* pr_padding - bring everything to 8 byte alignment */
+.popsection
+#endif
+
 #endif
 
 #if ZSTD_ENABLE_ASM_X86_64_BMI2
@@ -131,7 +152,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
     movq 88(%rax), %bits3
     movq 96(%rax), %dtable
     push %rax      /* argument */
-    push 104(%rax) /* ilimit */
+    push 104(%rax) /* ilowest */
     push 112(%rax) /* oend */
     push %olimit   /* olimit space */
 
@@ -156,11 +177,11 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
     shrq $2, %r15
 
     movq %ip0,     %rax /* rax = ip0 */
-    movq 40(%rsp), %rdx /* rdx = ilimit */
-    subq %rdx,     %rax /* rax = ip0 - ilimit */
-    movq %rax,     %rbx /* rbx = ip0 - ilimit */
+    movq 40(%rsp), %rdx /* rdx = ilowest */
+    subq %rdx,     %rax /* rax = ip0 - ilowest */
+    movq %rax,     %rbx /* rbx = ip0 - ilowest */
 
-    /* rdx = (ip0 - ilimit) / 7 */
+    /* rdx = (ip0 - ilowest) / 7 */
     movabsq $2635249153387078803, %rdx
     mulq %rdx
     subq %rdx, %rbx
@@ -183,9 +204,8 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
 
     /* If (op3 + 20 > olimit) */
     movq %op3, %rax    /* rax = op3 */
-    addq $20,  %rax    /* rax = op3 + 20 */
-    cmpq %rax, %olimit /* op3 + 20 > olimit */
-    jb .L_4X1_exit
+    cmpq %rax, %olimit /* op3 == olimit */
+    je .L_4X1_exit
 
     /* If (ip1 < ip0) go to exit */
     cmpq %ip0, %ip1
@@ -316,7 +336,7 @@ HUF_decompress4X1_usingDTable_internal_fast_asm_loop:
     /* Restore stack (oend & olimit) */
     pop %rax /* olimit */
     pop %rax /* oend */
-    pop %rax /* ilimit */
+    pop %rax /* ilowest */
     pop %rax /* arg */
 
     /* Save ip / op / bits */
@@ -387,7 +407,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
     movq 96(%rax), %dtable
     push %rax      /* argument */
     push %rax      /* olimit */
-    push 104(%rax) /* ilimit */
+    push 104(%rax) /* ilowest */
 
     movq 112(%rax), %rax
     push %rax /* oend3 */
@@ -414,9 +434,9 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
 
     /* We can consume up to 7 input bytes each iteration. */
     movq %ip0,     %rax  /* rax = ip0 */
-    movq 40(%rsp), %rdx  /* rdx = ilimit */
-    subq %rdx,     %rax  /* rax = ip0 - ilimit */
-    movq %rax,    %r15   /* r15 = ip0 - ilimit */
+    movq 40(%rsp), %rdx  /* rdx = ilowest */
+    subq %rdx,     %rax  /* rax = ip0 - ilowest */
+    movq %rax,    %r15   /* r15 = ip0 - ilowest */
 
     /* rdx = rax / 7 */
     movabsq $2635249153387078803, %rdx
@@ -426,7 +446,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
     addq %r15, %rdx
     shrq $2, %rdx
 
-    /* r15 = (ip0 - ilimit) / 7 */
+    /* r15 = (ip0 - ilowest) / 7 */
     movq %rdx, %r15
 
     /* r15 = min(r15, min(oend0 - op0, oend1 - op1, oend2 - op2, oend3 - op3) / 10) */
@@ -467,9 +487,8 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
 
     /* If (op3 + 10 > olimit) */
     movq %op3, %rax    /* rax = op3 */
-    addq $10,  %rax    /* rax = op3 + 10 */
-    cmpq %rax, %olimit /* op3 + 10 > olimit */
-    jb .L_4X2_exit
+    cmpq %rax, %olimit /* op3 == olimit */
+    je .L_4X2_exit
 
     /* If (ip1 < ip0) go to exit */
     cmpq %ip0, %ip1
@@ -537,7 +556,7 @@ HUF_decompress4X2_usingDTable_internal_fast_asm_loop:
     pop %rax /* oend1 */
     pop %rax /* oend2 */
     pop %rax /* oend3 */
-    pop %rax /* ilimit */
+    pop %rax /* ilowest */
     pop %rax /* olimit */
     pop %rax /* arg */
 
diff --git a/thirdparty/zstd/decompress/zstd_decompress.c b/thirdparty/zstd/decompress/zstd_decompress.c
index 7bc2713429..2f03cf7b0c 100644
--- a/thirdparty/zstd/decompress/zstd_decompress.c
+++ b/thirdparty/zstd/decompress/zstd_decompress.c
@@ -55,18 +55,19 @@
 /*-*******************************************************
 *  Dependencies
 *********************************************************/
-#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
+#include "../common/allocations.h"  /* ZSTD_customMalloc, ZSTD_customCalloc, ZSTD_customFree */
+#include "../common/error_private.h"
+#include "../common/zstd_internal.h"  /* blockProperties_t */
 #include "../common/mem.h"         /* low level memory routines */
+#include "../common/bits.h"  /* ZSTD_highbit32 */
 #define FSE_STATIC_LINKING_ONLY
 #include "../common/fse.h"
 #include "../common/huf.h"
 #include "../common/xxhash.h" /* XXH64_reset, XXH64_update, XXH64_digest, XXH64 */
-#include "../common/zstd_internal.h"  /* blockProperties_t */
 #include "zstd_decompress_internal.h"   /* ZSTD_DCtx */
 #include "zstd_ddict.h"  /* ZSTD_DDictDictContent */
 #include "zstd_decompress_block.h"   /* ZSTD_decompressBlock_internal */
-#include "../common/bits.h"  /* ZSTD_highbit32 */
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
 #  include "../legacy/zstd_legacy.h"
@@ -245,6 +246,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
     dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
     dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
     dctx->disableHufAsm = 0;
+    dctx->maxBlockSizeParam = 0;
 }
 
 static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
@@ -265,6 +267,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
 #endif
     dctx->noForwardProgress = 0;
     dctx->oversizedDuration = 0;
+    dctx->isFrameDecompression = 1;
 #if DYNAMIC_BMI2
     dctx->bmi2 = ZSTD_cpuSupportsBmi2();
 #endif
@@ -726,17 +729,17 @@ static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret)
     return frameSizeInfo;
 }
 
-static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize)
+static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize, ZSTD_format_e format)
 {
     ZSTD_frameSizeInfo frameSizeInfo;
     ZSTD_memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo));
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-    if (ZSTD_isLegacy(src, srcSize))
+    if (format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize))
         return ZSTD_findFrameSizeInfoLegacy(src, srcSize);
 #endif
 
-    if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
+    if (format == ZSTD_f_zstd1 && (srcSize >= ZSTD_SKIPPABLEHEADERSIZE)
         && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
         frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize);
         assert(ZSTD_isError(frameSizeInfo.compressedSize) ||
@@ -750,7 +753,7 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
         ZSTD_frameHeader zfh;
 
         /* Extract Frame Header */
-        {   size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize);
+        {   size_t const ret = ZSTD_getFrameHeader_advanced(&zfh, src, srcSize, format);
             if (ZSTD_isError(ret))
                 return ZSTD_errorFrameSizeInfo(ret);
             if (ret > 0)
@@ -793,15 +796,17 @@ static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize
     }
 }
 
+static size_t ZSTD_findFrameCompressedSize_advanced(const void *src, size_t srcSize, ZSTD_format_e format) {
+    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, format);
+    return frameSizeInfo.compressedSize;
+}
+
 /** ZSTD_findFrameCompressedSize() :
- *  compatible with legacy mode
- *  `src` must point to the start of a ZSTD frame, ZSTD legacy frame, or skippable frame
- *  `srcSize` must be at least as large as the frame contained
- *  @return : the compressed size of the frame starting at `src` */
+ * See docs in zstd.h
+ * Note: compatible with legacy mode */
 size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize)
 {
-    ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
-    return frameSizeInfo.compressedSize;
+    return ZSTD_findFrameCompressedSize_advanced(src, srcSize, ZSTD_f_zstd1);
 }
 
 /** ZSTD_decompressBound() :
@@ -815,7 +820,7 @@ unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize)
     unsigned long long bound = 0;
     /* Iterate over each frame */
     while (srcSize > 0) {
-        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
         size_t const compressedSize = frameSizeInfo.compressedSize;
         unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
         if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR)
@@ -835,7 +840,7 @@ size_t ZSTD_decompressionMargin(void const* src, size_t srcSize)
 
     /* Iterate over each frame */
     while (srcSize > 0) {
-        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize);
+        ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize, ZSTD_f_zstd1);
         size_t const compressedSize = frameSizeInfo.compressedSize;
         unsigned long long const decompressedBound = frameSizeInfo.decompressedBound;
         ZSTD_frameHeader zfh;
@@ -971,6 +976,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
     }
 
+    /* Shrink the blockSizeMax if enabled */
+    if (dctx->maxBlockSizeParam != 0)
+        dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam);
+
     /* Loop on each block */
     while (1) {
         BYTE* oBlockEnd = oend;
@@ -1003,7 +1012,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         switch(blockProperties.blockType)
         {
         case bt_compressed:
-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
+            assert(dctx->isFrameDecompression == 1);
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming);
             break;
         case bt_raw :
             /* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
@@ -1016,12 +1026,14 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         default:
             RETURN_ERROR(corruption_detected, "invalid block type");
         }
-
-        if (ZSTD_isError(decodedSize)) return decodedSize;
-        if (dctx->validateChecksum)
+        FORWARD_IF_ERROR(decodedSize, "Block decompression failure");
+        DEBUGLOG(5, "Decompressed block of dSize = %u", (unsigned)decodedSize);
+        if (dctx->validateChecksum) {
             XXH64_update(&dctx->xxhState, op, decodedSize);
-        if (decodedSize != 0)
+        }
+        if (decodedSize) /* support dst = NULL,0 */ {
             op += decodedSize;
+        }
         assert(ip != NULL);
         ip += cBlockSize;
         remainingSrcSize -= cBlockSize;
@@ -1051,7 +1063,9 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
     return (size_t)(op-ostart);
 }
 
-static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
+static
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
+size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
                                         void* dst, size_t dstCapacity,
                                   const void* src, size_t srcSize,
                                   const void* dict, size_t dictSize,
@@ -1071,7 +1085,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
     while (srcSize >= ZSTD_startingInputLength(dctx->format)) {
 
 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
-        if (ZSTD_isLegacy(src, srcSize)) {
+        if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
             size_t decodedSize;
             size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
             if (ZSTD_isError(frameSize)) return frameSize;
@@ -1081,6 +1095,15 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
             decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize);
             if (ZSTD_isError(decodedSize)) return decodedSize;
 
+            {
+                unsigned long long const expectedSize = ZSTD_getFrameContentSize(src, srcSize);
+                RETURN_ERROR_IF(expectedSize == ZSTD_CONTENTSIZE_ERROR, corruption_detected, "Corrupted frame header!");
+                if (expectedSize != ZSTD_CONTENTSIZE_UNKNOWN) {
+                    RETURN_ERROR_IF(expectedSize != decodedSize, corruption_detected,
+                        "Frame header size does not match decoded size!");
+                }
+            }
+
             assert(decodedSize <= dstCapacity);
             dst = (BYTE*)dst + decodedSize;
             dstCapacity -= decodedSize;
@@ -1092,7 +1115,7 @@ static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
         }
 #endif
 
-        if (srcSize >= 4) {
+        if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
             U32 const magicNumber = MEM_readLE32(src);
             DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
             if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
@@ -1319,7 +1342,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
             {
             case bt_compressed:
                 DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
-                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
+                assert(dctx->isFrameDecompression == 1);
+                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
                 dctx->expected = 0;  /* Streaming not supported */
                 break;
             case bt_raw :
@@ -1388,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
     case ZSTDds_decodeSkippableHeader:
         assert(src != NULL);
         assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
+        assert(dctx->format != ZSTD_f_zstd1_magicless);
         ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize);   /* complete skippable header */
         dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE);   /* note : dctx->expected can grow seriously large, beyond local buffer size */
         dctx->stage = ZSTDds_skipFrame;
@@ -1548,6 +1573,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
     dctx->litEntropy = dctx->fseEntropy = 0;
     dctx->dictID = 0;
     dctx->bType = bt_reserved;
+    dctx->isFrameDecompression = 1;
     ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
     ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue));  /* initial repcodes */
     dctx->LLTptr = dctx->entropy.LLTable;
@@ -1819,6 +1845,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
             bounds.lowerBound = 0;
             bounds.upperBound = 1;
             return bounds;
+        case ZSTD_d_maxBlockSize:
+            bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
+            bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
+            return bounds;
 
         default:;
     }
@@ -1863,6 +1893,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
         case ZSTD_d_disableHuffmanAssembly:
             *value = (int)dctx->disableHufAsm;
             return 0;
+        case ZSTD_d_maxBlockSize:
+            *value = dctx->maxBlockSizeParam;
+            return 0;
         default:;
     }
     RETURN_ERROR(parameter_unsupported, "");
@@ -1900,6 +1933,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
             CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
             dctx->disableHufAsm = value != 0;
             return 0;
+        case ZSTD_d_maxBlockSize:
+            if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
+            dctx->maxBlockSizeParam = value;
+            return 0;
         default:;
     }
     RETURN_ERROR(parameter_unsupported, "");
@@ -1911,6 +1948,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
       || (reset == ZSTD_reset_session_and_parameters) ) {
         dctx->streamStage = zdss_init;
         dctx->noForwardProgress = 0;
+        dctx->isFrameDecompression = 1;
     }
     if ( (reset == ZSTD_reset_parameters)
       || (reset == ZSTD_reset_session_and_parameters) ) {
@@ -1927,11 +1965,17 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
     return ZSTD_sizeof_DCtx(dctx);
 }
 
-size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
 {
-    size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
-    /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
-    unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
+    size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
+    /* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
+     * ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
+     * the block at the beginning of the output buffer, and maintain a full window.
+     *
+     * We need another blockSize worth of buffer so that we can store split
+     * literals at the end of the block without overwriting the extDict window.
+     */
+    unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
     unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
     size_t const minRBSize = (size_t) neededSize;
     RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
@@ -1939,6 +1983,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
     return minRBSize;
 }
 
+size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
+{
+    return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
+}
+
 size_t ZSTD_estimateDStreamSize(size_t windowSize)
 {
     size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
@@ -2134,12 +2183,12 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             if (zds->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN
                 && zds->fParams.frameType != ZSTD_skippableFrame
                 && (U64)(size_t)(oend-op) >= zds->fParams.frameContentSize) {
-                size_t const cSize = ZSTD_findFrameCompressedSize(istart, (size_t)(iend-istart));
+                size_t const cSize = ZSTD_findFrameCompressedSize_advanced(istart, (size_t)(iend-istart), zds->format);
                 if (cSize <= (size_t)(iend-istart)) {
                     /* shortcut : using single-pass mode */
                     size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, (size_t)(oend-op), istart, cSize, ZSTD_getDDict(zds));
                     if (ZSTD_isError(decompressedSize)) return decompressedSize;
-                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()")
+                    DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()");
                     assert(istart != NULL);
                     ip = istart + cSize;
                     op = op ? op + decompressedSize : op; /* can occur if frameContentSize = 0 (empty frame) */
@@ -2161,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             DEBUGLOG(4, "Consume header");
             FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");
 
-            if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
+            if (zds->format == ZSTD_f_zstd1
+                && (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {  /* skippable frame */
                 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
                 zds->stage = ZSTDds_skipFrame;
             } else {
@@ -2177,11 +2227,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
             RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
                             frameParameter_windowTooLarge, "");
+            if (zds->maxBlockSizeParam != 0)
+                zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam);
 
             /* Adapt buffer sizes to frame header instructions */
             {   size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
                 size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
-                        ? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
+                        ? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
                         : 0;
 
                 ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);
diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.c b/thirdparty/zstd/decompress/zstd_decompress_block.c
index 09896a931e..76d7332e88 100644
--- a/thirdparty/zstd/decompress/zstd_decompress_block.c
+++ b/thirdparty/zstd/decompress/zstd_decompress_block.c
@@ -51,6 +51,13 @@ static void ZSTD_copy4(void* dst, const void* src) { ZSTD_memcpy(dst, src, 4); }
  *   Block decoding
  ***************************************************************/
 
+static size_t ZSTD_blockSizeMax(ZSTD_DCtx const* dctx)
+{
+    size_t const blockSizeMax = dctx->isFrameDecompression ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX;
+    assert(blockSizeMax <= ZSTD_BLOCKSIZE_MAX);
+    return blockSizeMax;
+}
+
 /*! ZSTD_getcBlockSize() :
  *  Provides the size of compressed block from block header `src` */
 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
@@ -73,41 +80,49 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
 static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
     const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
 {
-    if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
-    {
-        /* room for litbuffer to fit without read faulting */
-        dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
+    size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
+    assert(litSize <= blockSizeMax);
+    assert(dctx->isFrameDecompression || streaming == not_streaming);
+    assert(expectedWriteSize <= blockSizeMax);
+    if (streaming == not_streaming && dstCapacity > blockSizeMax + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH) {
+        /* If we aren't streaming, we can just put the literals after the output
+         * of the current block. We don't need to worry about overwriting the
+         * extDict of our window, because it doesn't exist.
+         * So if we have space after the end of the block, just put it there.
+         */
+        dctx->litBuffer = (BYTE*)dst + blockSizeMax + WILDCOPY_OVERLENGTH;
         dctx->litBufferEnd = dctx->litBuffer + litSize;
         dctx->litBufferLocation = ZSTD_in_dst;
-    }
-    else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
-    {
-        /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
+    } else if (litSize <= ZSTD_LITBUFFEREXTRASIZE) {
+        /* Literals fit entirely within the extra buffer, put them there to avoid
+         * having to split the literals.
+         */
+        dctx->litBuffer = dctx->litExtraBuffer;
+        dctx->litBufferEnd = dctx->litBuffer + litSize;
+        dctx->litBufferLocation = ZSTD_not_in_dst;
+    } else {
+        assert(blockSizeMax > ZSTD_LITBUFFEREXTRASIZE);
+        /* Literals must be split between the output block and the extra lit
+         * buffer. We fill the extra lit buffer with the tail of the literals,
+         * and put the rest of the literals at the end of the block, with
+         * WILDCOPY_OVERLENGTH of buffer room to allow for overreads.
+         * This MUST not write more than our maxBlockSize beyond dst, because in
+         * streaming mode, that could overwrite part of our extDict window.
+         */
         if (splitImmediately) {
             /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
             dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
-        }
-        else {
+        } else {
             /* initially this will be stored entirely in dst during huffman decoding, it will partially be shifted to litExtraBuffer after */
             dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
             dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
         }
         dctx->litBufferLocation = ZSTD_split;
-    }
-    else
-    {
-        /* fits entirely within litExtraBuffer, so no split is necessary */
-        dctx->litBuffer = dctx->litExtraBuffer;
-        dctx->litBufferEnd = dctx->litBuffer + litSize;
-        dctx->litBufferLocation = ZSTD_not_in_dst;
+        assert(dctx->litBufferEnd <= (BYTE*)dst + expectedWriteSize);
     }
 }
 
-/* Hidden declaration for fullbench */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                          const void* src, size_t srcSize,
-                          void* dst, size_t dstCapacity, const streaming_operation streaming);
 /*! ZSTD_decodeLiteralsBlock() :
  * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
  * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
@@ -116,7 +131,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
  *
  * @return : nb of bytes read from src (< srcSize )
  *  note : symbol not declared but exposed for fullbench */
-size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
+static size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                           const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
                           void* dst, size_t dstCapacity, const streaming_operation streaming)
 {
@@ -125,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
 
     {   const BYTE* const istart = (const BYTE*) src;
         symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3);
+        size_t const blockSizeMax = ZSTD_blockSizeMax(dctx);
 
         switch(litEncType)
         {
@@ -140,7 +156,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 U32 const lhlCode = (istart[0] >> 2) & 3;
                 U32 const lhc = MEM_readLE32(istart);
                 size_t hufSuccess;
-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
                 int const flags = 0
                     | (ZSTD_DCtx_get_bmi2(dctx) ? HUF_flags_bmi2 : 0)
                     | (dctx->disableHufAsm ? HUF_flags_disableAsm : 0);
@@ -167,7 +183,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     break;
                 }
                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
-                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
                 if (!singleStream)
                     RETURN_ERROR_IF(litSize < MIN_LITERALS_FOR_4_STREAMS, literals_headerWrong,
                         "Not enough literals (%zu) for the 4-streams mode (min %u)",
@@ -214,10 +230,12 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 }
                 if (dctx->litBufferLocation == ZSTD_split)
                 {
+                    assert(litSize > ZSTD_LITBUFFEREXTRASIZE);
                     ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
                     ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
                     dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
                     dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
+                    assert(dctx->litBufferEnd <= (BYTE*)dst + blockSizeMax);
                 }
 
                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
@@ -232,7 +250,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
         case set_basic:
             {   size_t litSize, lhSize;
                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
                 switch(lhlCode)
                 {
                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
@@ -251,6 +269,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 }
 
                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
@@ -279,7 +298,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
         case set_rle:
             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
                 size_t litSize, lhSize;
-                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
+                size_t expectedWriteSize = MIN(blockSizeMax, dstCapacity);
                 switch(lhlCode)
                 {
                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
@@ -298,7 +317,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     break;
                 }
                 RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
-                RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
+                RETURN_ERROR_IF(litSize > blockSizeMax, corruption_detected, "");
                 RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
                 ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
                 if (dctx->litBufferLocation == ZSTD_split)
@@ -320,6 +339,18 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
     }
 }
 
+/* Hidden declaration for fullbench */
+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize,
+                          void* dst, size_t dstCapacity);
+size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
+                          const void* src, size_t srcSize,
+                          void* dst, size_t dstCapacity)
+{
+    dctx->isFrameDecompression = 0;
+    return ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, not_streaming);
+}
+
 /* Default FSE distribution tables.
  * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
  * https://github.com/facebook/zstd/blob/release/doc/zstd_compression_format.md#default-distributions
@@ -675,11 +706,6 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
 
     /* SeqHead */
     nbSeq = *ip++;
-    if (!nbSeq) {
-        *nbSeqPtr=0;
-        RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
-        return 1;
-    }
     if (nbSeq > 0x7F) {
         if (nbSeq == 0xFF) {
             RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
@@ -692,8 +718,16 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
     }
     *nbSeqPtr = nbSeq;
 
+    if (nbSeq == 0) {
+        /* No sequence : section ends immediately */
+        RETURN_ERROR_IF(ip != iend, corruption_detected,
+            "extraneous data present in the Sequences section");
+        return (size_t)(ip - istart);
+    }
+
     /* FSE table descriptors */
     RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
+    RETURN_ERROR_IF(*ip & 3, corruption_detected, ""); /* The last field, Reserved, must be all-zeroes. */
     {   symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
         symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
         symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
@@ -840,7 +874,7 @@ static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, pt
 /* ZSTD_safecopyDstBeforeSrc():
  * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
  * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
-static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
+static void ZSTD_safecopyDstBeforeSrc(BYTE* op, const BYTE* ip, ptrdiff_t length) {
     ptrdiff_t const diff = op - ip;
     BYTE* const oend = op + length;
 
@@ -869,6 +903,7 @@ static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length
  * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
  */
 FORCE_NOINLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_execSequenceEnd(BYTE* op,
     BYTE* const oend, seq_t sequence,
     const BYTE** litPtr, const BYTE* const litLimit,
@@ -916,6 +951,7 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
  * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
  */
 FORCE_NOINLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
     const BYTE** litPtr, const BYTE* const litLimit,
@@ -961,6 +997,7 @@ size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
 }
 
 HINT_INLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_execSequence(BYTE* op,
     BYTE* const oend, seq_t sequence,
     const BYTE** litPtr, const BYTE* const litLimit,
@@ -1059,6 +1096,7 @@ size_t ZSTD_execSequence(BYTE* op,
 }
 
 HINT_INLINE
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
     BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
     const BYTE** litPtr, const BYTE* const litLimit,
@@ -1181,14 +1219,20 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16
 
 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
 
+/**
+ * ZSTD_decodeSequence():
+ * @p longOffsets : tells the decoder to reload more bit while decoding large offsets
+ *                  only used in 32-bit mode
+ * @return : Sequence (litL + matchL + offset)
+ */
 FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const int isLastSeq)
 {
     seq_t seq;
     /*
-     * ZSTD_seqSymbol is a structure with a total of 64 bits wide. So it can be
-     * loaded in one operation and extracted its fields by simply shifting or
-     * bit-extracting on aarch64.
+     * ZSTD_seqSymbol is a 64 bits wide structure.
+     * It can be loaded in one operation
+     * and its fields extracted by simply shifting or bit-extracting on aarch64.
      * GCC doesn't recognize this and generates more unnecessary ldr/ldrb/ldrh
      * operations that cause performance drop. This can be avoided by using this
      * ZSTD_memcpy hack.
@@ -1261,7 +1305,7 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
                 } else {
                     offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
                     {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-                        temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                        temp -= !temp; /* 0 is not valid: input corrupted => force offset to -1 => corruption detected at execSequence */
                         if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
                         seqState->prevOffset[1] = seqState->prevOffset[0];
                         seqState->prevOffset[0] = offset = temp;
@@ -1288,17 +1332,22 @@ ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
         DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
                     (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
 
-        ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
-        ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
-        if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
-        ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
+        if (!isLastSeq) {
+            /* don't update FSE state for last Sequence */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
+            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
+            BIT_reloadDStream(&seqState->DStream);
+        }
     }
 
     return seq;
 }
 
-#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
-MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+#if DEBUGLEVEL >= 1
+static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
 {
     size_t const windowSize = dctx->fParams.windowSize;
     /* No dictionary used. */
@@ -1312,30 +1361,33 @@ MEM_STATIC int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefix
     /* Dictionary is active. */
     return 1;
 }
+#endif
 
-MEM_STATIC void ZSTD_assertValidSequence(
+static void ZSTD_assertValidSequence(
         ZSTD_DCtx const* dctx,
         BYTE const* op, BYTE const* oend,
         seq_t const seq,
         BYTE const* prefixStart, BYTE const* virtualStart)
 {
 #if DEBUGLEVEL >= 1
-    size_t const windowSize = dctx->fParams.windowSize;
-    size_t const sequenceSize = seq.litLength + seq.matchLength;
-    BYTE const* const oLitEnd = op + seq.litLength;
-    DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
-            (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
-    assert(op <= oend);
-    assert((size_t)(oend - op) >= sequenceSize);
-    assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
-    if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
-        size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
-        /* Offset must be within the dictionary. */
-        assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
-        assert(seq.offset <= windowSize + dictSize);
-    } else {
-        /* Offset must be within our window. */
-        assert(seq.offset <= windowSize);
+    if (dctx->isFrameDecompression) {
+        size_t const windowSize = dctx->fParams.windowSize;
+        size_t const sequenceSize = seq.litLength + seq.matchLength;
+        BYTE const* const oLitEnd = op + seq.litLength;
+        DEBUGLOG(6, "Checking sequence: litL=%u matchL=%u offset=%u",
+                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+        assert(op <= oend);
+        assert((size_t)(oend - op) >= sequenceSize);
+        assert(sequenceSize <= ZSTD_blockSizeMax(dctx));
+        if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
+            size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
+            /* Offset must be within the dictionary. */
+            assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
+            assert(seq.offset <= windowSize + dictSize);
+        } else {
+            /* Offset must be within our window. */
+            assert(seq.offset <= windowSize);
+        }
     }
 #else
     (void)dctx, (void)op, (void)oend, (void)seq, (void)prefixStart, (void)virtualStart;
@@ -1351,23 +1403,21 @@ DONT_VECTORIZE
 ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
                                void* dst, size_t maxDstSize,
                          const void* seqStart, size_t seqSize, int nbSeq,
-                         const ZSTD_longOffset_e isLongOffset,
-                         const int frame)
+                         const ZSTD_longOffset_e isLongOffset)
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + maxDstSize;
+    BYTE* const oend = ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
     const BYTE* litBufferEnd = dctx->litBufferEnd;
     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
-    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
-    (void)frame;
+    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer (%i seqs)", nbSeq);
 
-    /* Regen sequences */
+    /* Literals are split between internal buffer & output buffer */
     if (nbSeq) {
         seqState_t seqState;
         dctx->fseEntropy = 1;
@@ -1386,8 +1436,7 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
                 BIT_DStream_completed < BIT_DStream_overflow);
 
         /* decompress without overrunning litPtr begins */
-        {
-            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+        {   seq_t sequence = {0,0,0};  /* some static analyzer believe that @sequence is not initialized (it necessarily is, since for(;;) loop as at least one iteration) */
             /* Align the decompression loop to 32 + 16 bytes.
                 *
                 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
@@ -1449,27 +1498,26 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
 #endif
 
             /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
-            for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
-                size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+            for ( ; nbSeq; nbSeq--) {
+                sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
+                if (litPtr + sequence.litLength > dctx->litBufferEnd) break;
+                {   size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+                    assert(!ZSTD_isError(oneSeqSize));
+                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
 #endif
-                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
-                    return oneSeqSize;
-                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-                op += oneSeqSize;
-                if (UNLIKELY(!--nbSeq))
-                    break;
-                BIT_reloadDStream(&(seqState.DStream));
-                sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-            }
+                    if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                        return oneSeqSize;
+                    DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+                    op += oneSeqSize;
+            }   }
+            DEBUGLOG(6, "reached: (litPtr + sequence.litLength > dctx->litBufferEnd)");
 
             /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
             if (nbSeq > 0) {
                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
-                if (leftoverLit)
-                {
+                DEBUGLOG(6, "There are %i sequences left, and %zu/%zu literals left in buffer", nbSeq, leftoverLit, sequence.litLength);
+                if (leftoverLit) {
                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
                     sequence.litLength -= leftoverLit;
@@ -1478,24 +1526,22 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
                 litPtr = dctx->litExtraBuffer;
                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
                 dctx->litBufferLocation = ZSTD_not_in_dst;
-                {
-                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
                     assert(!ZSTD_isError(oneSeqSize));
-                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+                    ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
 #endif
                     if (UNLIKELY(ZSTD_isError(oneSeqSize)))
                         return oneSeqSize;
                     DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
                     op += oneSeqSize;
-                    if (--nbSeq)
-                        BIT_reloadDStream(&(seqState.DStream));
                 }
+                nbSeq--;
             }
         }
 
-        if (nbSeq > 0) /* there is remaining lit from extra buffer */
-        {
+        if (nbSeq > 0) {
+            /* there is remaining lit from extra buffer */
 
 #if defined(__GNUC__) && defined(__x86_64__)
             __asm__(".p2align 6");
@@ -1514,35 +1560,34 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
 #  endif
 #endif
 
-            for (; ; ) {
-                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            for ( ; nbSeq ; nbSeq--) {
+                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
                 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
                 assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+                ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
 #endif
                 if (UNLIKELY(ZSTD_isError(oneSeqSize)))
                     return oneSeqSize;
                 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
                 op += oneSeqSize;
-                if (UNLIKELY(!--nbSeq))
-                    break;
-                BIT_reloadDStream(&(seqState.DStream));
             }
         }
 
         /* check if reached exact end */
         DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
-        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+        DEBUGLOG(5, "bitStream : start=%p, ptr=%p, bitsConsumed=%u", seqState.DStream.start, seqState.DStream.ptr, seqState.DStream.bitsConsumed);
+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
         /* save reps for next block */
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
     }
 
     /* last literal segment */
-    if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
-    {
-        size_t const lastLLSize = litBufferEnd - litPtr;
+    if (dctx->litBufferLocation == ZSTD_split) {
+        /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
+        size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
+        DEBUGLOG(6, "copy last literals from segment : %u", (U32)lastLLSize);
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
         if (op != NULL) {
             ZSTD_memmove(op, litPtr, lastLLSize);
@@ -1552,15 +1597,17 @@ ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
         litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
         dctx->litBufferLocation = ZSTD_not_in_dst;
     }
-    {   size_t const lastLLSize = litBufferEnd - litPtr;
+    /* copy last literals from internal buffer */
+    {   size_t const lastLLSize = (size_t)(litBufferEnd - litPtr);
+        DEBUGLOG(6, "copy last literals from internal buffer : %u", (U32)lastLLSize);
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         if (op != NULL) {
             ZSTD_memcpy(op, litPtr, lastLLSize);
             op += lastLLSize;
-        }
-    }
+    }   }
 
-    return op-ostart;
+    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
+    return (size_t)(op - ostart);
 }
 
 FORCE_INLINE_TEMPLATE size_t
@@ -1568,13 +1615,12 @@ DONT_VECTORIZE
 ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
     void* dst, size_t maxDstSize,
     const void* seqStart, size_t seqSize, int nbSeq,
-    const ZSTD_longOffset_e isLongOffset,
-    const int frame)
+    const ZSTD_longOffset_e isLongOffset)
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
+    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ZSTD_maybeNullPtrAdd(ostart, maxDstSize) : dctx->litBuffer;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
     const BYTE* const litEnd = litPtr + dctx->litSize;
@@ -1582,7 +1628,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
     const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
     const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
     DEBUGLOG(5, "ZSTD_decompressSequences_body: nbSeq = %d", nbSeq);
-    (void)frame;
 
     /* Regen sequences */
     if (nbSeq) {
@@ -1597,11 +1642,6 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
         assert(dst != NULL);
 
-        ZSTD_STATIC_ASSERT(
-            BIT_DStream_unfinished < BIT_DStream_completed &&
-            BIT_DStream_endOfBuffer < BIT_DStream_completed &&
-            BIT_DStream_completed < BIT_DStream_overflow);
-
 #if defined(__GNUC__) && defined(__x86_64__)
             __asm__(".p2align 6");
             __asm__("nop");
@@ -1616,73 +1656,70 @@ ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
 #  endif
 #endif
 
-        for ( ; ; ) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+        for ( ; nbSeq ; nbSeq--) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, nbSeq==1);
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
             assert(!ZSTD_isError(oneSeqSize));
-            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+            ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
 #endif
             if (UNLIKELY(ZSTD_isError(oneSeqSize)))
                 return oneSeqSize;
             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
             op += oneSeqSize;
-            if (UNLIKELY(!--nbSeq))
-                break;
-            BIT_reloadDStream(&(seqState.DStream));
         }
 
         /* check if reached exact end */
-        DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
-        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
-        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+        assert(nbSeq == 0);
+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
         /* save reps for next block */
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
     }
 
     /* last literal segment */
-    {   size_t const lastLLSize = litEnd - litPtr;
+    {   size_t const lastLLSize = (size_t)(litEnd - litPtr);
+        DEBUGLOG(6, "copy last literals : %u", (U32)lastLLSize);
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         if (op != NULL) {
             ZSTD_memcpy(op, litPtr, lastLLSize);
             op += lastLLSize;
-        }
-    }
+    }   }
 
-    return op-ostart;
+    DEBUGLOG(6, "decoded block of size %u bytes", (U32)(op - ostart));
+    return (size_t)(op - ostart);
 }
 
 static size_t
 ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
+                           const ZSTD_longOffset_e isLongOffset)
 {
-    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 
 static size_t
 ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
                                                void* dst, size_t maxDstSize,
                                          const void* seqStart, size_t seqSize, int nbSeq,
-                                         const ZSTD_longOffset_e isLongOffset,
-                                         const int frame)
+                                         const ZSTD_longOffset_e isLongOffset)
 {
-    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
 
-FORCE_INLINE_TEMPLATE size_t
-ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+FORCE_INLINE_TEMPLATE
+
+size_t ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
                    const BYTE* const prefixStart, const BYTE* const dictEnd)
 {
     prefetchPos += sequence.litLength;
     {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
-        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+         * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        const BYTE* const match = ZSTD_wrappedPtrSub(ZSTD_wrappedPtrAdd(matchBase, prefetchPos), sequence.offset);
         PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
     }
     return prefetchPos + sequence.matchLength;
@@ -1697,20 +1734,18 @@ ZSTD_decompressSequencesLong_body(
                                ZSTD_DCtx* dctx,
                                void* dst, size_t maxDstSize,
                          const void* seqStart, size_t seqSize, int nbSeq,
-                         const ZSTD_longOffset_e isLongOffset,
-                         const int frame)
+                         const ZSTD_longOffset_e isLongOffset)
 {
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
+    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ZSTD_maybeNullPtrAdd(ostart, maxDstSize);
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
     const BYTE* litBufferEnd = dctx->litBufferEnd;
     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
-    (void)frame;
 
     /* Regen sequences */
     if (nbSeq) {
@@ -1735,20 +1770,17 @@ ZSTD_decompressSequencesLong_body(
         ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
 
         /* prepare in advance */
-        for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+        for (seqNb=0; seqNb<seqAdvance; seqNb++) {
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
             prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
             sequences[seqNb] = sequence;
         }
-        RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
 
         /* decompress without stomping litBuffer */
-        for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
-            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
-            size_t oneSeqSize;
+        for (; seqNb < nbSeq; seqNb++) {
+            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset, seqNb == nbSeq-1);
 
-            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
-            {
+            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd) {
                 /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
                 if (leftoverLit)
@@ -1761,26 +1793,26 @@ ZSTD_decompressSequencesLong_body(
                 litPtr = dctx->litExtraBuffer;
                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
                 dctx->litBufferLocation = ZSTD_not_in_dst;
-                oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-                assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+                    assert(!ZSTD_isError(oneSeqSize));
+                    ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
-                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+                    if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
 
-                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
-                sequences[seqNb & STORED_SEQS_MASK] = sequence;
-                op += oneSeqSize;
-            }
+                    prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+                    sequences[seqNb & STORED_SEQS_MASK] = sequence;
+                    op += oneSeqSize;
+            }   }
             else
             {
                 /* lit buffer is either wholly contained in first or second split, or not split at all*/
-                oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
+                size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
                     ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
                     ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
                 assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+                ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
 
@@ -1789,17 +1821,15 @@ ZSTD_decompressSequencesLong_body(
                 op += oneSeqSize;
             }
         }
-        RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
+        RETURN_ERROR_IF(!BIT_endOfDStream(&seqState.DStream), corruption_detected, "");
 
         /* finish queue */
         seqNb -= seqAdvance;
         for ( ; seqNb<nbSeq ; seqNb++) {
             seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
-            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
-            {
+            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd) {
                 const size_t leftoverLit = dctx->litBufferEnd - litPtr;
-                if (leftoverLit)
-                {
+                if (leftoverLit) {
                     RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
                     ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
                     sequence->litLength -= leftoverLit;
@@ -1808,11 +1838,10 @@ ZSTD_decompressSequencesLong_body(
                 litPtr = dctx->litExtraBuffer;
                 litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
                 dctx->litBufferLocation = ZSTD_not_in_dst;
-                {
-                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+                {   size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
                     assert(!ZSTD_isError(oneSeqSize));
-                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+                    ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
                     if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
                     op += oneSeqSize;
@@ -1825,7 +1854,7 @@ ZSTD_decompressSequencesLong_body(
                     ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
                 assert(!ZSTD_isError(oneSeqSize));
-                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+                ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
                 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
                 op += oneSeqSize;
@@ -1837,8 +1866,7 @@ ZSTD_decompressSequencesLong_body(
     }
 
     /* last literal segment */
-    if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */
-    {
+    if (dctx->litBufferLocation == ZSTD_split) { /* first deplete literal buffer in dst, then copy litExtraBuffer */
         size_t const lastLLSize = litBufferEnd - litPtr;
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
         if (op != NULL) {
@@ -1856,17 +1884,16 @@ ZSTD_decompressSequencesLong_body(
         }
     }
 
-    return op-ostart;
+    return (size_t)(op - ostart);
 }
 
 static size_t
 ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
+                           const ZSTD_longOffset_e isLongOffset)
 {
-    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
 
@@ -1880,20 +1907,18 @@ DONT_VECTORIZE
 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
+                           const ZSTD_longOffset_e isLongOffset)
 {
-    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 static BMI2_TARGET_ATTRIBUTE size_t
 DONT_VECTORIZE
 ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
+                           const ZSTD_longOffset_e isLongOffset)
 {
-    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
@@ -1902,10 +1927,9 @@ static BMI2_TARGET_ATTRIBUTE size_t
 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
-                           const ZSTD_longOffset_e isLongOffset,
-                           const int frame)
+                           const ZSTD_longOffset_e isLongOffset)
 {
-    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
 
@@ -1915,37 +1939,34 @@ typedef size_t (*ZSTD_decompressSequences_t)(
                             ZSTD_DCtx* dctx,
                             void* dst, size_t maxDstSize,
                             const void* seqStart, size_t seqSize, int nbSeq,
-                            const ZSTD_longOffset_e isLongOffset,
-                            const int frame);
+                            const ZSTD_longOffset_e isLongOffset);
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
 static size_t
 ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
                    const void* seqStart, size_t seqSize, int nbSeq,
-                   const ZSTD_longOffset_e isLongOffset,
-                   const int frame)
+                   const ZSTD_longOffset_e isLongOffset)
 {
     DEBUGLOG(5, "ZSTD_decompressSequences");
 #if DYNAMIC_BMI2
     if (ZSTD_DCtx_get_bmi2(dctx)) {
-        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+        return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
     }
 #endif
-    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 static size_t
 ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
                                  const void* seqStart, size_t seqSize, int nbSeq,
-                                 const ZSTD_longOffset_e isLongOffset,
-                                 const int frame)
+                                 const ZSTD_longOffset_e isLongOffset)
 {
     DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
 #if DYNAMIC_BMI2
     if (ZSTD_DCtx_get_bmi2(dctx)) {
-        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
     }
 #endif
-    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
@@ -1960,16 +1981,15 @@ static size_t
 ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
                              void* dst, size_t maxDstSize,
                              const void* seqStart, size_t seqSize, int nbSeq,
-                             const ZSTD_longOffset_e isLongOffset,
-                             const int frame)
+                             const ZSTD_longOffset_e isLongOffset)
 {
     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
 #if DYNAMIC_BMI2
     if (ZSTD_DCtx_get_bmi2(dctx)) {
-        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+        return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
     }
 #endif
-  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+  return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
 
@@ -2051,20 +2071,20 @@ static size_t ZSTD_maxShortOffset(void)
 size_t
 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
                               void* dst, size_t dstCapacity,
-                        const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
+                        const void* src, size_t srcSize, const streaming_operation streaming)
 {   /* blockType == blockCompressed */
     const BYTE* ip = (const BYTE*)src;
-    DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
+    DEBUGLOG(5, "ZSTD_decompressBlock_internal (cSize : %u)", (unsigned)srcSize);
 
     /* Note : the wording of the specification
-     * allows compressed block to be sized exactly ZSTD_BLOCKSIZE_MAX.
+     * allows compressed block to be sized exactly ZSTD_blockSizeMax(dctx).
      * This generally does not happen, as it makes little sense,
      * since an uncompressed block would feature same size and have no decompression cost.
      * Also, note that decoder from reference libzstd before < v1.5.4
      * would consider this edge case as an error.
-     * As a consequence, avoid generating compressed blocks of size ZSTD_BLOCKSIZE_MAX
+     * As a consequence, avoid generating compressed blocks of size ZSTD_blockSizeMax(dctx)
      * for broader compatibility with the deployed ecosystem of zstd decoders */
-    RETURN_ERROR_IF(srcSize > ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
+    RETURN_ERROR_IF(srcSize > ZSTD_blockSizeMax(dctx), srcSize_wrong, "");
 
     /* Decode literals section */
     {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
@@ -2079,8 +2099,8 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
         /* Compute the maximum block size, which must also work when !frame and fParams are unset.
          * Additionally, take the min with dstCapacity to ensure that the totalHistorySize fits in a size_t.
          */
-        size_t const blockSizeMax = MIN(dstCapacity, (frame ? dctx->fParams.blockSizeMax : ZSTD_BLOCKSIZE_MAX));
-        size_t const totalHistorySize = ZSTD_totalHistorySize((BYTE*)dst + blockSizeMax, (BYTE const*)dctx->virtualStart);
+        size_t const blockSizeMax = MIN(dstCapacity, ZSTD_blockSizeMax(dctx));
+        size_t const totalHistorySize = ZSTD_totalHistorySize(ZSTD_maybeNullPtrAdd((BYTE*)dst, blockSizeMax), (BYTE const*)dctx->virtualStart);
         /* isLongOffset must be true if there are long offsets.
          * Offsets are long if they are larger than ZSTD_maxShortOffset().
          * We don't expect that to be the case in 64-bit mode.
@@ -2145,21 +2165,22 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
         {
 #endif
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+            return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
 #endif
         }
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
         /* else */
         if (dctx->litBufferLocation == ZSTD_split)
-            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
         else
-            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset);
 #endif
     }
 }
 
 
+ZSTD_ALLOW_POINTER_OVERFLOW_ATTR
 void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst, size_t dstSize)
 {
     if (dst != dctx->previousDstEnd && dstSize > 0) {   /* not contiguous */
@@ -2176,8 +2197,10 @@ size_t ZSTD_decompressBlock_deprecated(ZSTD_DCtx* dctx,
                                  const void* src, size_t srcSize)
 {
     size_t dSize;
+    dctx->isFrameDecompression = 0;
     ZSTD_checkContinuity(dctx, dst, dstCapacity);
-    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, not_streaming);
+    FORWARD_IF_ERROR(dSize, "");
     dctx->previousDstEnd = (char*)dst + dSize;
     return dSize;
 }
diff --git a/thirdparty/zstd/decompress/zstd_decompress_block.h b/thirdparty/zstd/decompress/zstd_decompress_block.h
index 9d1318882d..ab152404ba 100644
--- a/thirdparty/zstd/decompress/zstd_decompress_block.h
+++ b/thirdparty/zstd/decompress/zstd_decompress_block.h
@@ -47,7 +47,7 @@ typedef enum {
  */
 size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
                                void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
+                         const void* src, size_t srcSize, const streaming_operation streaming);
 
 /* ZSTD_buildFSETable() :
  * generate FSE decoding table for one symbol (ll, ml or off)
diff --git a/thirdparty/zstd/decompress/zstd_decompress_internal.h b/thirdparty/zstd/decompress/zstd_decompress_internal.h
index c2ec5d9fbe..83a7a0115f 100644
--- a/thirdparty/zstd/decompress/zstd_decompress_internal.h
+++ b/thirdparty/zstd/decompress/zstd_decompress_internal.h
@@ -153,6 +153,7 @@ struct ZSTD_DCtx_s
     size_t litSize;
     size_t rleSize;
     size_t staticSize;
+    int isFrameDecompression;
 #if DYNAMIC_BMI2 != 0
     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
 #endif
@@ -166,6 +167,7 @@ struct ZSTD_DCtx_s
     ZSTD_DDictHashSet* ddictSet;                    /* Hash set for multiple ddicts */
     ZSTD_refMultipleDDicts_e refMultipleDDicts;     /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
     int disableHufAsm;
+    int maxBlockSizeParam;
 
     /* streaming */
     ZSTD_dStreamStage streamStage;
diff --git a/thirdparty/zstd/zstd.h b/thirdparty/zstd/zstd.h
index e5c3f8b68b..5d1fef8a6b 100644
--- a/thirdparty/zstd/zstd.h
+++ b/thirdparty/zstd/zstd.h
@@ -106,7 +106,7 @@ extern "C" {
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
 #define ZSTD_VERSION_MINOR    5
-#define ZSTD_VERSION_RELEASE  5
+#define ZSTD_VERSION_RELEASE  6
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 
 /*! ZSTD_versionNumber() :
@@ -228,7 +228,7 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
  * for example to size a static array on stack.
  * Will produce constant value 0 if srcSize too large.
  */
-#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00LLU : 0xFF00FF00U)
+#define ZSTD_MAX_INPUT_SIZE ((sizeof(size_t)==8) ? 0xFF00FF00FF00FF00ULL : 0xFF00FF00U)
 #define ZSTD_COMPRESSBOUND(srcSize)   (((size_t)(srcSize) >= ZSTD_MAX_INPUT_SIZE) ? 0 : (srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0))  /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */
 ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
 /* ZSTD_isError() :
@@ -249,7 +249,7 @@ ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compres
 /*= Compression context
  *  When compressing many times,
  *  it is recommended to allocate a context just once,
- *  and re-use it for each successive compression operation.
+ *  and reuse it for each successive compression operation.
  *  This will make workload friendlier for system's memory.
  *  Note : re-using context is just a speed / resource optimization.
  *         It doesn't change the compression ratio, which remains identical.
@@ -262,9 +262,9 @@ ZSTDLIB_API size_t     ZSTD_freeCCtx(ZSTD_CCtx* cctx);  /* accept NULL pointer *
 
 /*! ZSTD_compressCCtx() :
  *  Same as ZSTD_compress(), using an explicit ZSTD_CCtx.
- *  Important : in order to behave similarly to `ZSTD_compress()`,
- *  this function compresses at requested compression level,
- *  __ignoring any other parameter__ .
+ *  Important : in order to mirror `ZSTD_compress()` behavior,
+ *  this function compresses at the requested compression level,
+ *  __ignoring any other advanced parameter__ .
  *  If any advanced parameter was set using the advanced API,
  *  they will all be reset. Only `compressionLevel` remains.
  */
@@ -276,7 +276,7 @@ ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
 /*= Decompression context
  *  When decompressing many times,
  *  it is recommended to allocate a context only once,
- *  and re-use it for each successive compression operation.
+ *  and reuse it for each successive compression operation.
  *  This will make workload friendlier for system's memory.
  *  Use one context per thread for parallel execution. */
 typedef struct ZSTD_DCtx_s ZSTD_DCtx;
@@ -286,7 +286,7 @@ ZSTDLIB_API size_t     ZSTD_freeDCtx(ZSTD_DCtx* dctx);  /* accept NULL pointer *
 /*! ZSTD_decompressDCtx() :
  *  Same as ZSTD_decompress(),
  *  requires an allocated ZSTD_DCtx.
- *  Compatible with sticky parameters.
+ *  Compatible with sticky parameters (see below).
  */
 ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
                                        void* dst, size_t dstCapacity,
@@ -302,12 +302,12 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
  *   using ZSTD_CCtx_set*() functions.
  *   Pushed parameters are sticky : they are valid for next compressed frame, and any subsequent frame.
  *   "sticky" parameters are applicable to `ZSTD_compress2()` and `ZSTD_compressStream*()` !
- *   __They do not apply to "simple" one-shot variants such as ZSTD_compressCCtx()__ .
+ *   __They do not apply to one-shot variants such as ZSTD_compressCCtx()__ .
  *
  *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
  *
  *   This API supersedes all other "advanced" API entry points in the experimental section.
- *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
+ *   In the future, we expect to remove API entry points from experimental which are redundant with this API.
  */
 
 
@@ -390,6 +390,19 @@ typedef enum {
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
                               * Special: value 0 means "use default strategy". */
+
+    ZSTD_c_targetCBlockSize=130, /* v1.5.6+
+                                  * Attempts to fit compressed block size into approximatively targetCBlockSize.
+                                  * Bound by ZSTD_TARGETCBLOCKSIZE_MIN and ZSTD_TARGETCBLOCKSIZE_MAX.
+                                  * Note that it's not a guarantee, just a convergence target (default:0).
+                                  * No target when targetCBlockSize == 0.
+                                  * This is helpful in low bandwidth streaming environments to improve end-to-end latency,
+                                  * when a client can make use of partial documents (a prominent example being Chrome).
+                                  * Note: this parameter is stable since v1.5.6.
+                                  * It was present as an experimental parameter in earlier versions,
+                                  * but it's not recommended using it with earlier library versions
+                                  * due to massive performance regressions.
+                                  */
     /* LDM mode parameters */
     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
                                      * This parameter is designed to improve compression ratio
@@ -469,7 +482,6 @@ typedef enum {
      * ZSTD_c_forceMaxWindow
      * ZSTD_c_forceAttachDict
      * ZSTD_c_literalCompressionMode
-     * ZSTD_c_targetCBlockSize
      * ZSTD_c_srcSizeHint
      * ZSTD_c_enableDedicatedDictSearch
      * ZSTD_c_stableInBuffer
@@ -490,7 +502,7 @@ typedef enum {
      ZSTD_c_experimentalParam3=1000,
      ZSTD_c_experimentalParam4=1001,
      ZSTD_c_experimentalParam5=1002,
-     ZSTD_c_experimentalParam6=1003,
+     /* was ZSTD_c_experimentalParam6=1003; is now ZSTD_c_targetCBlockSize */
      ZSTD_c_experimentalParam7=1004,
      ZSTD_c_experimentalParam8=1005,
      ZSTD_c_experimentalParam9=1006,
@@ -575,6 +587,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset);
 
 /*! ZSTD_compress2() :
  *  Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API.
+ *  (note that this entry point doesn't even expose a compression level parameter).
  *  ZSTD_compress2() always starts a new frame.
  *  Should cctx hold data from a previously unfinished frame, everything about it is forgotten.
  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
@@ -618,6 +631,7 @@ typedef enum {
      * ZSTD_d_forceIgnoreChecksum
      * ZSTD_d_refMultipleDDicts
      * ZSTD_d_disableHuffmanAssembly
+     * ZSTD_d_maxBlockSize
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly
      */
@@ -625,7 +639,8 @@ typedef enum {
      ZSTD_d_experimentalParam2=1001,
      ZSTD_d_experimentalParam3=1002,
      ZSTD_d_experimentalParam4=1003,
-     ZSTD_d_experimentalParam5=1004
+     ZSTD_d_experimentalParam5=1004,
+     ZSTD_d_experimentalParam6=1005
 
 } ZSTD_dParameter;
 
@@ -680,14 +695,14 @@ typedef struct ZSTD_outBuffer_s {
 *  A ZSTD_CStream object is required to track streaming operation.
 *  Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources.
 *  ZSTD_CStream objects can be reused multiple times on consecutive compression operations.
-*  It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
+*  It is recommended to reuse ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory.
 *
 *  For parallel execution, use one separate ZSTD_CStream per thread.
 *
 *  note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing.
 *
 *  Parameters are sticky : when starting a new compression on the same context,
-*  it will re-use the same sticky parameters as previous compression session.
+*  it will reuse the same sticky parameters as previous compression session.
 *  When in doubt, it's recommended to fully initialize the context before usage.
 *  Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(),
 *  ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to
@@ -776,6 +791,11 @@ typedef enum {
  *            only ZSTD_e_end or ZSTD_e_flush operations are allowed.
  *            Before starting a new compression job, or changing compression parameters,
  *            it is required to fully flush internal buffers.
+ *  - note: if an operation ends with an error, it may leave @cctx in an undefined state.
+ *          Therefore, it's UB to invoke ZSTD_compressStream2() of ZSTD_compressStream() on such a state.
+ *          In order to be re-employed after an error, a state must be reset,
+ *          which can be done explicitly (ZSTD_CCtx_reset()),
+ *          or is sometimes implied by methods starting a new compression job (ZSTD_initCStream(), ZSTD_compressCCtx())
  */
 ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx,
                                          ZSTD_outBuffer* output,
@@ -835,7 +855,7 @@ ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output);
 *
 *  A ZSTD_DStream object is required to track streaming operations.
 *  Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources.
-*  ZSTD_DStream objects can be re-used multiple times.
+*  ZSTD_DStream objects can be reused multiple times.
 *
 *  Use ZSTD_initDStream() to start a new decompression operation.
 * @return : recommended first input size
@@ -889,6 +909,12 @@ ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds);
  * @return : 0 when a frame is completely decoded and fully flushed,
  *           or an error code, which can be tested using ZSTD_isError(),
  *           or any other value > 0, which means there is some decoding or flushing to do to complete current frame.
+ *
+ * Note: when an operation returns with an error code, the @zds state may be left in undefined state.
+ *       It's UB to invoke `ZSTD_decompressStream()` on such a state.
+ *       In order to re-use such a state, it must be first reset,
+ *       which can be done explicitly (`ZSTD_DCtx_reset()`),
+ *       or is implied for operations starting some new decompression job (`ZSTD_initDStream`, `ZSTD_decompressDCtx()`, `ZSTD_decompress_usingDict()`)
  */
 ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input);
 
@@ -1021,7 +1047,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
  *
  * This API allows dictionaries to be used with ZSTD_compress2(),
  * ZSTD_compressStream2(), and ZSTD_decompressDCtx().
- * Dictionaries are sticky, they remain valid when same context is re-used,
+ * Dictionaries are sticky, they remain valid when same context is reused,
  * they only reset when the context is reset
  * with ZSTD_reset_parameters or ZSTD_reset_session_and_parameters.
  * In contrast, Prefixes are single-use.
@@ -1239,7 +1265,7 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN)
 
 /* Advanced parameter bounds */
-#define ZSTD_TARGETCBLOCKSIZE_MIN   64
+#define ZSTD_TARGETCBLOCKSIZE_MIN   1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */
 #define ZSTD_TARGETCBLOCKSIZE_MAX   ZSTD_BLOCKSIZE_MAX
 #define ZSTD_SRCSIZEHINT_MIN        0
 #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
@@ -1527,25 +1553,38 @@ typedef enum {
 ZSTDLIB_STATIC_API size_t ZSTD_sequenceBound(size_t srcSize);
 
 /*! ZSTD_generateSequences() :
+ * WARNING: This function is meant for debugging and informational purposes ONLY!
+ * Its implementation is flawed, and it will be deleted in a future version.
+ * It is not guaranteed to succeed, as there are several cases where it will give
+ * up and fail. You should NOT use this function in production code.
+ *
+ * This function is deprecated, and will be removed in a future version.
+ *
  * Generate sequences using ZSTD_compress2(), given a source buffer.
  *
+ * @param zc The compression context to be used for ZSTD_compress2(). Set any
+ *           compression parameters you need on this context.
+ * @param outSeqs The output sequences buffer of size @p outSeqsSize
+ * @param outSeqsSize The size of the output sequences buffer.
+ *                    ZSTD_sequenceBound(srcSize) is an upper bound on the number
+ *                    of sequences that can be generated.
+ * @param src The source buffer to generate sequences from of size @p srcSize.
+ * @param srcSize The size of the source buffer.
+ *
  * Each block will end with a dummy sequence
  * with offset == 0, matchLength == 0, and litLength == length of last literals.
  * litLength may be == 0, and if so, then the sequence of (of: 0 ml: 0 ll: 0)
  * simply acts as a block delimiter.
  *
- * @zc can be used to insert custom compression params.
- * This function invokes ZSTD_compress2().
- *
- * The output of this function can be fed into ZSTD_compressSequences() with CCtx
- * setting of ZSTD_c_blockDelimiters as ZSTD_sf_explicitBlockDelimiters
- * @return : number of sequences generated
+ * @returns The number of sequences generated, necessarily less than
+ *          ZSTD_sequenceBound(srcSize), or an error code that can be checked
+ *          with ZSTD_isError().
  */
-
+ZSTD_DEPRECATED("For debugging only, will be replaced by ZSTD_extractSequences()")
 ZSTDLIB_STATIC_API size_t
-ZSTD_generateSequences( ZSTD_CCtx* zc,
-                        ZSTD_Sequence* outSeqs, size_t outSeqsSize,
-                        const void* src, size_t srcSize);
+ZSTD_generateSequences(ZSTD_CCtx* zc,
+                       ZSTD_Sequence* outSeqs, size_t outSeqsSize,
+                       const void* src, size_t srcSize);
 
 /*! ZSTD_mergeBlockDelimiters() :
  * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
@@ -1640,56 +1679,59 @@ ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
 /*! ZSTD_estimate*() :
  *  These functions make it possible to estimate memory usage
  *  of a future {D,C}Ctx, before its creation.
+ *  This is useful in combination with ZSTD_initStatic(),
+ *  which makes it possible to employ a static buffer for ZSTD_CCtx* state.
  *
  *  ZSTD_estimateCCtxSize() will provide a memory budget large enough
- *  for any compression level up to selected one.
- *  Note : Unlike ZSTD_estimateCStreamSize*(), this estimate
- *         does not include space for a window buffer.
- *         Therefore, the estimation is only guaranteed for single-shot compressions, not streaming.
+ *  to compress data of any size using one-shot compression ZSTD_compressCCtx() or ZSTD_compress2()
+ *  associated with any compression level up to max specified one.
  *  The estimate will assume the input may be arbitrarily large,
  *  which is the worst case.
  *
+ *  Note that the size estimation is specific for one-shot compression,
+ *  it is not valid for streaming (see ZSTD_estimateCStreamSize*())
+ *  nor other potential ways of using a ZSTD_CCtx* state.
+ *
  *  When srcSize can be bound by a known and rather "small" value,
- *  this fact can be used to provide a tighter estimation
- *  because the CCtx compression context will need less memory.
- *  This tighter estimation can be provided by more advanced functions
+ *  this knowledge can be used to provide a tighter budget estimation
+ *  because the ZSTD_CCtx* state will need less memory for small inputs.
+ *  This tighter estimation can be provided by employing more advanced functions
  *  ZSTD_estimateCCtxSize_usingCParams(), which can be used in tandem with ZSTD_getCParams(),
  *  and ZSTD_estimateCCtxSize_usingCCtxParams(), which can be used in tandem with ZSTD_CCtxParams_setParameter().
  *  Both can be used to estimate memory using custom compression parameters and arbitrary srcSize limits.
  *
  *  Note : only single-threaded compression is supported.
  *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
- *
- *  Note 2 : ZSTD_estimateCCtxSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
- *  Size estimates assume that no external sequence producer is registered.
  */
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int maxCompressionLevel);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
 
 /*! ZSTD_estimateCStreamSize() :
- *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
- *  It will also consider src size to be arbitrarily "large", which is worst case.
+ *  ZSTD_estimateCStreamSize() will provide a memory budget large enough for streaming compression
+ *  using any compression level up to the max specified one.
+ *  It will also consider src size to be arbitrarily "large", which is a worst case scenario.
  *  If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation.
  *  ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel.
  *  ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1.
  *  Note : CStream size estimation is only correct for single-threaded compression.
- *  ZSTD_DStream memory budget depends on window Size.
+ *  ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
+ *  Note 2 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
+ *  Size estimates assume that no external sequence producer is registered.
+ *
+ *  ZSTD_DStream memory budget depends on frame's window Size.
  *  This information can be passed manually, using ZSTD_estimateDStreamSize,
  *  or deducted from a valid frame Header, using ZSTD_estimateDStreamSize_fromFrame();
+ *  Any frame requesting a window size larger than max specified one will be rejected.
  *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
  *         an internal ?Dict will be created, which additional size is not estimated here.
  *         In this case, get total size by adding ZSTD_estimate?DictSize
- *  Note 2 : only single-threaded compression is supported.
- *  ZSTD_estimateCStreamSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
- *  Note 3 : ZSTD_estimateCStreamSize* functions are not compatible with the Block-Level Sequence Producer API at this time.
- *  Size estimates assume that no external sequence producer is registered.
  */
-ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int maxCompressionLevel);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t maxWindowSize);
 ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
 
 /*! ZSTD_estimate?DictSize() :
@@ -1946,11 +1988,6 @@ ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const vo
  */
 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
 
-/* Tries to fit compressed block size to be around targetCBlockSize.
- * No target when targetCBlockSize == 0.
- * There is no guarantee on compressed block size (default:0) */
-#define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6
-
 /* User's best guess of source size.
  * Hint is not valid when srcSizeHint == 0.
  * There is no guarantee that hint is close to actual source size,
@@ -2430,6 +2467,22 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
  */
 #define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
 
+/* ZSTD_d_maxBlockSize
+ * Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
+ * The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
+ *
+ * Forces the decompressor to reject blocks whose content size is
+ * larger than the configured maxBlockSize. When maxBlockSize is
+ * larger than the windowSize, the windowSize is used instead.
+ * This saves memory on the decoder when you know all blocks are small.
+ *
+ * This option is typically used in conjunction with ZSTD_c_maxBlockSize.
+ *
+ * WARNING: This causes the decoder to reject otherwise valid frames
+ * that have block sizes larger than the configured maxBlockSize.
+ */
+#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6
+
 
 /*! ZSTD_DCtx_setFormat() :
  *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
@@ -2557,7 +2610,7 @@ size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
  *       explicitly specified.
  *
  *  start a new frame, using same parameters from previous frame.
- *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
+ *  This is typically useful to skip dictionary loading stage, since it will reuse it in-place.
  *  Note that zcs must be init at least once before using ZSTD_resetCStream().
  *  If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
  *  If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
@@ -2633,7 +2686,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const Z
  *
  *     ZSTD_DCtx_reset(zds, ZSTD_reset_session_only);
  *
- * re-use decompression parameters from previous init; saves dictionary loading
+ * reuse decompression parameters from previous init; saves dictionary loading
  */
 ZSTD_DEPRECATED("use ZSTD_DCtx_reset, see zstd.h for detailed instructions")
 ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
@@ -2765,7 +2818,7 @@ ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 
 #define ZSTD_SEQUENCE_PRODUCER_ERROR ((size_t)(-1))
 
-typedef size_t ZSTD_sequenceProducer_F (
+typedef size_t (*ZSTD_sequenceProducer_F) (
   void* sequenceProducerState,
   ZSTD_Sequence* outSeqs, size_t outSeqsCapacity,
   const void* src, size_t srcSize,
@@ -2797,7 +2850,23 @@ ZSTDLIB_STATIC_API void
 ZSTD_registerSequenceProducer(
   ZSTD_CCtx* cctx,
   void* sequenceProducerState,
-  ZSTD_sequenceProducer_F* sequenceProducer
+  ZSTD_sequenceProducer_F sequenceProducer
+);
+
+/*! ZSTD_CCtxParams_registerSequenceProducer() :
+ * Same as ZSTD_registerSequenceProducer(), but operates on ZSTD_CCtx_params.
+ * This is used for accurate size estimation with ZSTD_estimateCCtxSize_usingCCtxParams(),
+ * which is needed when creating a ZSTD_CCtx with ZSTD_initStaticCCtx().
+ *
+ * If you are using the external sequence producer API in a scenario where ZSTD_initStaticCCtx()
+ * is required, then this function is for you. Otherwise, you probably don't need it.
+ *
+ * See tests/zstreamtest.c for example usage. */
+ZSTDLIB_STATIC_API void
+ZSTD_CCtxParams_registerSequenceProducer(
+  ZSTD_CCtx_params* params,
+  void* sequenceProducerState,
+  ZSTD_sequenceProducer_F sequenceProducer
 );
 
 
@@ -2820,7 +2889,7 @@ ZSTD_registerSequenceProducer(
 
   A ZSTD_CCtx object is required to track streaming operations.
   Use ZSTD_createCCtx() / ZSTD_freeCCtx() to manage resource.
-  ZSTD_CCtx object can be re-used multiple times within successive compression operations.
+  ZSTD_CCtx object can be reused multiple times within successive compression operations.
 
   Start by initializing a context.
   Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
@@ -2841,7 +2910,7 @@ ZSTD_registerSequenceProducer(
   It's possible to use srcSize==0, in which case, it will write a final empty block to end the frame.
   Without last block mark, frames are considered unfinished (hence corrupted) by compliant decoders.
 
-  `ZSTD_CCtx` object can be re-used (ZSTD_compressBegin()) to compress again.
+  `ZSTD_CCtx` object can be reused (ZSTD_compressBegin()) to compress again.
 */
 
 /*=====   Buffer-less streaming compression functions  =====*/
@@ -2873,7 +2942,7 @@ size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_
 
   A ZSTD_DCtx object is required to track streaming operations.
   Use ZSTD_createDCtx() / ZSTD_freeDCtx() to manage it.
-  A ZSTD_DCtx object can be re-used multiple times.
+  A ZSTD_DCtx object can be reused multiple times.
 
   First typical operation is to retrieve frame parameters, using ZSTD_getFrameHeader().
   Frame header is extracted from the beginning of compressed frame, so providing only the frame's beginning is enough.