60 files changed, 25747 insertions, 8 deletions
diff --git a/thirdparty/README.md b/thirdparty/README.md
index f495ed112d..1eb95a1a7c 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -17,6 +17,33 @@ Files extracted from upstream source:
 - `license.txt`
 
 
+## amd-fsr2
+
+- Upstream: https://github.com/GPUOpen-Effects/FidelityFX-FSR2
+- Version: 2.2.1 (1680d1edd5c034f88ebbbb793d8b88f8842cf804, 2023)
+- License: MIT
+
+Files extracted from upstream source:
+
+- `ffx_*.cpp` and `ffx_*.h` from `src/ffx-fsr2-api`
+- `shaders` folder from `src/ffx-fsr2-api` with `ffx_*.hlsl` files excluded
+- `LICENSE.txt`
+
+Apply `patches` to add the new options required by Godot and general compilation fixes.
+
+
+## angle
+
+- Upstream: https://chromium.googlesource.com/angle/angle/
+- Version: git (chromium/5907, 430a4f559cbc2bcd5d026e8b36ee46ddd80e9651, 2023)
+- License: BSD-3-Clause
+
+Files extracted from upstream source:
+
+- `include/*`
+- `LICENSE`
+
+
 ## astcenc
 
 - Upstream: https://github.com/ARM-software/astc-encoder
@@ -243,7 +270,10 @@ Files extracted from upstream source:
 - `LICENSE`
 
 Files generated from [upstream web instance](https://gen.glad.sh/):
+- `EGL/eglplatform.h`
 - `KHR/khrplatform.h`
+- `egl.c`
+- `glad/egl.h`
 - `gl.c`
 - `glad/gl.h`
 - `glx.c`
@@ -252,6 +282,9 @@ Files generated from [upstream web instance](https://gen.glad.sh/):
 See the permalinks in `glad/gl.h` and `glad/glx.h` to regenrate the files with
 a new version of the web instance.
 
+Some changes have been made in order to allow loading OpenGL and OpenGLES APIs at the same time.
+See the patches in the `patches` directory.
+
 
 ## glslang
 
diff --git a/thirdparty/amd-fsr2/LICENSE.txt b/thirdparty/amd-fsr2/LICENSE.txt
new file mode 100644
index 0000000000..c066ae1063
--- /dev/null
+++ b/thirdparty/amd-fsr2/LICENSE.txt
@@ -0,0 +1,21 @@
+FidelityFX Super Resolution 2.2
+=================================
+Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/thirdparty/amd-fsr2/ffx_assert.cpp b/thirdparty/amd-fsr2/ffx_assert.cpp
new file mode 100644
index 0000000000..8a70ad501a
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_assert.cpp
@@ -0,0 +1,81 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_assert.h"
+#include <stdlib.h>  // for malloc()
+
+#ifdef _WIN32
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN
+#endif
+#include <windows.h>  // required for OutputDebugString()
+#include <stdio.h>    // required for sprintf_s
+#endif                // #ifndef _WIN32
+
+static FfxAssertCallback s_assertCallback;
+
+// set the printing callback function
+void ffxAssertSetPrintingCallback(FfxAssertCallback callback)
+{
+    s_assertCallback = callback;
+    return;
+}
+
+// implementation of assert reporting
+bool ffxAssertReport(const char* file, int32_t line, const char* condition, const char* message)
+{
+    if (!file) {
+
+        return true;
+    }
+
+#ifdef _WIN32
+    // form the final assertion string and output to the TTY.
+    const size_t bufferSize = static_cast<size_t>(snprintf(nullptr, 0, "%s(%d): ASSERTION FAILED. %s\n", file, line, message ? message : condition)) + 1;
+    char*        tempBuf    = static_cast<char*>(malloc(bufferSize));
+    if (!tempBuf) {
+
+        return true;
+    }
+
+    if (!message) {
+        sprintf_s(tempBuf, bufferSize, "%s(%d): ASSERTION FAILED. %s\n", file, line, condition);
+    } else {
+        sprintf_s(tempBuf, bufferSize, "%s(%d): ASSERTION FAILED. %s\n", file, line, message);
+    }
+
+    if (!s_assertCallback) {
+        OutputDebugStringA(tempBuf);
+    } else {
+        s_assertCallback(tempBuf);
+    }
+
+    // free the buffer.
+    free(tempBuf);
+
+#else
+    FFX_UNUSED(line);
+    FFX_UNUSED(condition);
+    FFX_UNUSED(message);
+#endif
+
+    return true;
+}
diff --git a/thirdparty/amd-fsr2/ffx_assert.h b/thirdparty/amd-fsr2/ffx_assert.h
new file mode 100644
index 0000000000..ae32d2a733
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_assert.h
@@ -0,0 +1,132 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_types.h"
+#include "ffx_util.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // #ifdef __cplusplus
+
+#ifdef _DEBUG
+#ifdef _WIN32
+
+#ifdef DISABLE_FFX_DEBUG_BREAK
+#define FFX_DEBUG_BREAK \
+    {                   \
+    }
+#else
+/// Macro to force the debugger to break at this point in the code.
+#define FFX_DEBUG_BREAK __debugbreak();
+#endif
+#else
+#define FFX_DEBUG_BREAK \
+    {                   \
+    }
+#endif
+#else
+// don't allow debug break in release builds.
+#define FFX_DEBUG_BREAK
+#endif
+
+/// A typedef for the callback function for assert printing.
+///
+/// This can be used to re-route printing of assert messages from the FFX backend
+/// to another destination. For example instead of the default behaviour of printing
+/// the assert messages to the debugger's TTY the message can be re-routed to a
+/// MessageBox in a GUI application.
+///
+/// @param [in] message                 The message generated by the assert.
+///
+typedef void (*FfxAssertCallback)(const char* message);
+
+/// Function to report an assert.
+///
+/// @param [in] file                    The name of the file as a string.
+/// @param [in] line                    The index of the line in the file.
+/// @param [in] condition               The boolean condition that was tested.
+/// @param [in] msg                     The optional message to print.
+///
+/// @returns
+/// Always returns true.
+///
+FFX_API bool ffxAssertReport(const char* file, int32_t line, const char* condition, const char* msg);
+
+/// Provides the ability to set a callback for assert messages.
+///
+/// @param [in] callback                The callback function that will receive assert messages.
+///
+FFX_API void ffxAssertSetPrintingCallback(FfxAssertCallback callback);
+
+#ifdef _DEBUG
+/// Standard assert macro.
+#define FFX_ASSERT(condition)                                                      \
+    do                                                                             \
+    {                                                                              \
+        if (!(condition) && ffxAssertReport(__FILE__, __LINE__, #condition, NULL)) \
+            FFX_DEBUG_BREAK                                                        \
+    } while (0)
+
+/// Assert macro with message.
+#define FFX_ASSERT_MESSAGE(condition, msg)                                        \
+    do                                                                            \
+    {                                                                             \
+        if (!(condition) && ffxAssertReport(__FILE__, __LINE__, #condition, msg)) \
+            FFX_DEBUG_BREAK                                                       \
+    } while (0)
+
+/// Assert macro that always fails.
+#define FFX_ASSERT_FAIL(message)                            \
+    do                                                      \
+    {                                                       \
+        ffxAssertReport(__FILE__, __LINE__, NULL, message); \
+        FFX_DEBUG_BREAK                                     \
+    } while (0)
+#else
+// asserts disabled
+#define FFX_ASSERT(condition)  \
+    do                         \
+    {                          \
+        FFX_UNUSED(condition); \
+    } while (0)
+
+#define FFX_ASSERT_MESSAGE(condition, message) \
+    do                                         \
+    {                                          \
+        FFX_UNUSED(condition);                 \
+        FFX_UNUSED(message);                   \
+    } while (0)
+
+#define FFX_ASSERT_FAIL(message) \
+    do                           \
+    {                            \
+        FFX_UNUSED(message);     \
+    } while (0)
+#endif  // #if _DEBUG
+
+/// Simple static assert.
+#define FFX_STATIC_ASSERT(condition) static_assert(condition, #condition)
+
+#ifdef __cplusplus
+}
+#endif  // #ifdef __cplusplus
diff --git a/thirdparty/amd-fsr2/ffx_error.h b/thirdparty/amd-fsr2/ffx_error.h
new file mode 100644
index 0000000000..7ba7d9c4ea
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_error.h
@@ -0,0 +1,59 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_types.h"
+
+/// Typedef for error codes returned from functions in the FidelityFX SDK.
+typedef int32_t FfxErrorCode;
+
+static const FfxErrorCode FFX_OK                            = 0;           ///< The operation completed successfully.
+static const FfxErrorCode FFX_ERROR_INVALID_POINTER         = 0x80000000;  ///< The operation failed due to an invalid pointer.
+static const FfxErrorCode FFX_ERROR_INVALID_ALIGNMENT       = 0x80000001;  ///< The operation failed due to an invalid alignment.
+static const FfxErrorCode FFX_ERROR_INVALID_SIZE            = 0x80000002;  ///< The operation failed due to an invalid size.
+static const FfxErrorCode FFX_EOF                           = 0x80000003;  ///< The end of the file was encountered.
+static const FfxErrorCode FFX_ERROR_INVALID_PATH            = 0x80000004;  ///< The operation failed because the specified path was invalid.
+static const FfxErrorCode FFX_ERROR_EOF                     = 0x80000005;  ///< The operation failed because end of file was reached.
+static const FfxErrorCode FFX_ERROR_MALFORMED_DATA          = 0x80000006;  ///< The operation failed because of some malformed data.
+static const FfxErrorCode FFX_ERROR_OUT_OF_MEMORY           = 0x80000007;  ///< The operation failed because it ran out memory.
+static const FfxErrorCode FFX_ERROR_INCOMPLETE_INTERFACE    = 0x80000008;  ///< The operation failed because the interface was not fully configured.
+static const FfxErrorCode FFX_ERROR_INVALID_ENUM            = 0x80000009;  ///< The operation failed because of an invalid enumeration value.
+static const FfxErrorCode FFX_ERROR_INVALID_ARGUMENT        = 0x8000000a;  ///< The operation failed because an argument was invalid.
+static const FfxErrorCode FFX_ERROR_OUT_OF_RANGE            = 0x8000000b;  ///< The operation failed because a value was out of range.
+static const FfxErrorCode FFX_ERROR_NULL_DEVICE             = 0x8000000c;  ///< The operation failed because a device was null.
+static const FfxErrorCode FFX_ERROR_BACKEND_API_ERROR       = 0x8000000d;  ///< The operation failed because the backend API returned an error code.
+static const FfxErrorCode FFX_ERROR_INSUFFICIENT_MEMORY     = 0x8000000e;  ///< The operation failed because there was not enough memory.
+
+/// Helper macro to return error code y from a function when a specific condition, x, is not met.
+#define FFX_RETURN_ON_ERROR(x, y)                   \
+    if (!(x))                                       \
+    {                                               \
+        return (y);                                 \
+    }
+
+/// Helper macro to return error code x from a function when it is not FFX_OK.
+#define FFX_VALIDATE(x)                             \
+    {                                               \
+        FfxErrorCode ret = x;                       \
+        FFX_RETURN_ON_ERROR(ret == FFX_OK, ret);    \
+    }
+
diff --git a/thirdparty/amd-fsr2/ffx_fsr2.cpp b/thirdparty/amd-fsr2/ffx_fsr2.cpp
new file mode 100644
index 0000000000..864f7f1294
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_fsr2.cpp
@@ -0,0 +1,1373 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include <algorithm>    // for max used inside SPD CPU code.
+#include <cmath>        // for fabs, abs, sinf, sqrt, etc.
+#include <string.h>     // for memset
+#include <cfloat>       // for FLT_EPSILON
+#include "ffx_fsr2.h"
+#define FFX_CPU
+#include "shaders/ffx_core.h"
+#include "shaders/ffx_fsr1.h"
+#include "shaders/ffx_spd.h"
+#include "shaders/ffx_fsr2_callbacks_hlsl.h"
+
+#include "ffx_fsr2_maximum_bias.h"
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif
+
+// -- GODOT start --
+#ifndef _countof
+#define _countof(array) (sizeof(array) / sizeof(array[0]))
+#endif
+
+#ifndef _MSC_VER
+#include <wchar.h>
+#define wcscpy_s wcscpy
+#endif
+// -- GODOT end --
+
+// max queued frames for descriptor management
+static const uint32_t FSR2_MAX_QUEUED_FRAMES = 16;
+
+#include "ffx_fsr2_private.h"
+
+// lists to map shader resource bindpoint name to resource identifier
+typedef struct ResourceBinding
+{
+    uint32_t    index;
+    wchar_t     name[64];
+}ResourceBinding;
+
+static const ResourceBinding srvResourceBindingTable[] =
+{
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR,                              L"r_input_color_jittered"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY,                        L"r_input_opaque_only"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS,                     L"r_input_motion_vectors"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH,                              L"r_input_depth" },
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE,                           L"r_input_exposure"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE,                            L"r_auto_exposure"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK,                      L"r_reactive_mask"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK,  L"r_transparency_and_composition_mask"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH,     L"r_reconstructed_previous_nearest_depth"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS,                   L"r_dilated_motion_vectors"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS,          L"r_previous_dilated_motion_vectors"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH,                            L"r_dilatedDepth"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR,                  L"r_internal_upscaled_color"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS,                              L"r_lock_status"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR,                     L"r_prepared_input_color"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY,                             L"r_luma_history" },
+    {FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT,                               L"r_rcas_input"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT,                              L"r_lanczos_lut"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE,                          L"r_imgMips"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE,    L"r_img_mip_shading_change"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5,                 L"r_img_mip_5"},
+    {FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT,                L"r_upsample_maximum_bias_lut"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS,                   L"r_dilated_reactive_masks"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS,                                L"r_new_locks"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA,                          L"r_lock_input_luma"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR,                     L"r_input_prev_color_pre_alpha"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR,                    L"r_input_prev_color_post_alpha"},
+};
+
+static const ResourceBinding uavResourceBindingTable[] =
+{
+    {FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH,    L"rw_reconstructed_previous_nearest_depth"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS,                  L"rw_dilated_motion_vectors"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH,                           L"rw_dilatedDepth"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR,                 L"rw_internal_upscaled_color"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS,                             L"rw_lock_status"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR,                    L"rw_prepared_input_color"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY,                            L"rw_luma_history"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT,                         L"rw_upscaled_output"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE,   L"rw_img_mip_shading_change"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5,                L"rw_img_mip_5"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS,                  L"rw_dilated_reactive_masks"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE,                           L"rw_auto_exposure"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT,                        L"rw_spd_global_atomic"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS,                               L"rw_new_locks"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA,                         L"rw_lock_input_luma"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE,                            L"rw_output_autoreactive"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION,                         L"rw_output_autocomposition"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR,                    L"rw_output_prev_color_pre_alpha"},
+    {FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR,                   L"rw_output_prev_color_post_alpha"},
+};
+
+static const ResourceBinding cbResourceBindingTable[] =
+{
+    {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2,           L"cbFSR2"},
+    {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD,            L"cbSPD"},
+    {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS,           L"cbRCAS"},
+    {FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE,    L"cbGenerateReactive"},
+};
+
+// Broad structure of the root signature.
+typedef enum Fsr2RootSignatureLayout {
+
+    FSR2_ROOT_SIGNATURE_LAYOUT_UAVS,
+    FSR2_ROOT_SIGNATURE_LAYOUT_SRVS,
+    FSR2_ROOT_SIGNATURE_LAYOUT_CONSTANTS,
+    FSR2_ROOT_SIGNATURE_LAYOUT_CONSTANTS_REGISTER_1,
+    FSR2_ROOT_SIGNATURE_LAYOUT_PARAMETER_COUNT
+} Fsr2RootSignatureLayout;
+
+typedef struct Fsr2RcasConstants {
+
+    uint32_t                    rcasConfig[4];
+} FfxRcasConstants;
+
+typedef struct Fsr2SpdConstants {
+
+    uint32_t                    mips;
+    uint32_t                    numworkGroups;
+    uint32_t                    workGroupOffset[2];
+    uint32_t                    renderSize[2];
+} Fsr2SpdConstants;
+
+typedef struct Fsr2GenerateReactiveConstants
+{
+    float       scale;
+    float       threshold;
+    float       binaryValue;
+    uint32_t    flags;
+
+} Fsr2GenerateReactiveConstants;
+
+typedef struct Fsr2GenerateReactiveConstants2
+{
+    float       autoTcThreshold;
+    float       autoTcScale;
+    float       autoReactiveScale;
+    float       autoReactiveMax;
+
+} Fsr2GenerateReactiveConstants2;
+
+typedef union Fsr2SecondaryUnion {
+
+    Fsr2RcasConstants               rcas;
+    Fsr2SpdConstants                spd;
+    Fsr2GenerateReactiveConstants2  autogenReactive;
+} Fsr2SecondaryUnion;
+
+typedef struct Fsr2ResourceDescription {
+
+    uint32_t                    id;
+    const wchar_t*              name;
+    FfxResourceUsage            usage;
+    FfxSurfaceFormat            format;
+    uint32_t                    width;
+    uint32_t                    height;
+    uint32_t                    mipCount;
+    FfxResourceFlags            flags;
+    uint32_t                    initDataSize;
+    void*                       initData;
+} Fsr2ResourceDescription;
+
+FfxConstantBuffer globalFsr2ConstantBuffers[4] = {
+    { sizeof(Fsr2Constants) / sizeof(uint32_t) },
+    { sizeof(Fsr2SpdConstants) / sizeof(uint32_t) },
+    { sizeof(Fsr2RcasConstants) / sizeof(uint32_t) },
+    { sizeof(Fsr2GenerateReactiveConstants) / sizeof(uint32_t) }
+};
+
+// Lanczos
+static float lanczos2(float value)
+{
+    return abs(value) < FFX_EPSILON ? 1.f : (sinf(FFX_PI * value) / (FFX_PI * value)) * (sinf(0.5f * FFX_PI * value) / (0.5f * FFX_PI * value));
+}
+
+// Calculate halton number for index and base.
+static float halton(int32_t index, int32_t base)
+{
+    float f = 1.0f, result = 0.0f;
+
+    for (int32_t currentIndex = index; currentIndex > 0;) {
+
+        f /= (float)base;
+        result = result + f * (float)(currentIndex % base);
+        currentIndex = (uint32_t)(floorf((float)(currentIndex) / (float)(base)));
+    }
+
+    return result;
+}
+
+static void fsr2DebugCheckDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params)
+{
+    if (params->commandList == nullptr)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"commandList is null");
+    }
+
+    if (params->color.resource == nullptr)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"color resource is null");
+    }
+
+    if (params->depth.resource == nullptr)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"depth resource is null");
+    }
+
+    if (params->motionVectors.resource == nullptr)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"motionVectors resource is null");
+    }
+
+    if (params->exposure.resource != nullptr)
+    {
+        if ((context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) == FFX_FSR2_ENABLE_AUTO_EXPOSURE)
+        {
+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"exposure resource provided, however auto exposure flag is present");
+        }
+    }
+
+    if (params->output.resource == nullptr)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"output resource is null");
+    }
+
+    if (fabs(params->jitterOffset.x) > 1.0f || fabs(params->jitterOffset.y) > 1.0f)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"jitterOffset contains value outside of expected range [-1.0, 1.0]");
+    }
+
+    if ((params->motionVectorScale.x > (float)context->contextDescription.maxRenderSize.width) ||
+        (params->motionVectorScale.y > (float)context->contextDescription.maxRenderSize.height))
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains scale value greater than maxRenderSize");
+    }
+    if ((params->motionVectorScale.x == 0.0f) ||
+        (params->motionVectorScale.y == 0.0f))
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"motionVectorScale contains zero scale value");
+    }
+
+    if ((params->renderSize.width > context->contextDescription.maxRenderSize.width) ||
+        (params->renderSize.height > context->contextDescription.maxRenderSize.height))
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize is greater than context maxRenderSize");
+    }
+    if ((params->renderSize.width == 0) ||
+        (params->renderSize.height == 0))
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"renderSize contains zero dimension");
+    }
+
+    if (params->sharpness < 0.0f || params->sharpness > 1.0f)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"sharpness contains value outside of expected range [0.0, 1.0]");
+    }
+
+    if (params->frameTimeDelta < 1.0f)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING, L"frameTimeDelta is less than 1.0f - this value should be milliseconds (~16.6f for 60fps)");
+    }
+
+    if (params->preExposure == 0.0f)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"preExposure provided as 0.0f which is invalid");
+    }
+
+    bool infiniteDepth = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INFINITE) == FFX_FSR2_ENABLE_DEPTH_INFINITE;
+    bool inverseDepth = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INVERTED) == FFX_FSR2_ENABLE_DEPTH_INVERTED;
+
+    if (inverseDepth)
+    {
+        if (params->cameraNear < params->cameraFar)
+        {
+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+                L"FFX_FSR2_ENABLE_DEPTH_INVERTED flag is present yet cameraNear is less than cameraFar");
+        }
+        if (infiniteDepth)
+        {
+            if (params->cameraNear != FLT_MAX)
+            {
+                context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+                    L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraNear != FLT_MAX");
+            }
+        }
+        if (params->cameraFar < 0.075f)
+        {
+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+                L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraFar value is very low which may result in depth separation artefacting");
+        }
+    }
+    else
+    {
+        if (params->cameraNear > params->cameraFar)
+        {
+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+                L"cameraNear is greater than cameraFar in non-inverted-depth context");
+        }
+        if (infiniteDepth)
+        {
+            if (params->cameraFar != FLT_MAX)
+            {
+                context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+                    L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, yet cameraFar != FLT_MAX");
+            }
+        }
+        if (params->cameraNear < 0.075f)
+        {
+            context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_WARNING,
+                L"FFX_FSR2_ENABLE_DEPTH_INFINITE and FFX_FSR2_ENABLE_DEPTH_INVERTED present, cameraNear value is very low which may result in depth separation artefacting");
+        }
+    }
+
+    if (params->cameraFovAngleVertical <= 0.0f)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is 0.0f - this value should be > 0.0f");
+    }
+    if (params->cameraFovAngleVertical > FFX_PI)
+    {
+        context->contextDescription.fpMessage(FFX_FSR2_MESSAGE_TYPE_ERROR, L"cameraFovAngleVertical is greater than 180 degrees/PI");
+    }
+}
+
+static FfxErrorCode patchResourceBindings(FfxPipelineState* inoutPipeline)
+{
+    for (uint32_t srvIndex = 0; srvIndex < inoutPipeline->srvCount; ++srvIndex)
+    {
+        int32_t mapIndex = 0;
+        for (mapIndex = 0; mapIndex < _countof(srvResourceBindingTable); ++mapIndex)
+        {
+            if (0 == wcscmp(srvResourceBindingTable[mapIndex].name, inoutPipeline->srvResourceBindings[srvIndex].name))
+                break;
+        }
+        if (mapIndex == _countof(srvResourceBindingTable))
+            return FFX_ERROR_INVALID_ARGUMENT;
+
+        inoutPipeline->srvResourceBindings[srvIndex].resourceIdentifier = srvResourceBindingTable[mapIndex].index;
+    }
+
+    for (uint32_t uavIndex = 0; uavIndex < inoutPipeline->uavCount; ++uavIndex)
+    {
+        int32_t mapIndex = 0;
+        for (mapIndex = 0; mapIndex < _countof(uavResourceBindingTable); ++mapIndex)
+        {
+            if (0 == wcscmp(uavResourceBindingTable[mapIndex].name, inoutPipeline->uavResourceBindings[uavIndex].name))
+                break;
+        }
+        if (mapIndex == _countof(uavResourceBindingTable))
+            return FFX_ERROR_INVALID_ARGUMENT;
+
+        inoutPipeline->uavResourceBindings[uavIndex].resourceIdentifier = uavResourceBindingTable[mapIndex].index;
+    }
+
+    for (uint32_t cbIndex = 0; cbIndex < inoutPipeline->constCount; ++cbIndex)
+    {
+        int32_t mapIndex = 0;
+        for (mapIndex = 0; mapIndex < _countof(cbResourceBindingTable); ++mapIndex)
+        {
+            if (0 == wcscmp(cbResourceBindingTable[mapIndex].name, inoutPipeline->cbResourceBindings[cbIndex].name))
+                break;
+        }
+        if (mapIndex == _countof(cbResourceBindingTable))
+            return FFX_ERROR_INVALID_ARGUMENT;
+
+        inoutPipeline->cbResourceBindings[cbIndex].resourceIdentifier = cbResourceBindingTable[mapIndex].index;
+    }
+
+    return FFX_OK;
+}
+
+
+static FfxErrorCode createPipelineStates(FfxFsr2Context_Private* context)
+{
+    FFX_ASSERT(context);
+
+    const size_t samplerCount = 2;
+    FfxFilterType samplers[samplerCount];
+    samplers[0] = FFX_FILTER_TYPE_POINT;
+    samplers[1] = FFX_FILTER_TYPE_LINEAR;
+
+    const size_t rootConstantCount = 2;
+    uint32_t rootConstants[rootConstantCount];
+    rootConstants[0] = sizeof(Fsr2Constants) / sizeof(uint32_t);
+    rootConstants[1] = sizeof(Fsr2SecondaryUnion) / sizeof(uint32_t);
+
+    FfxPipelineDescription pipelineDescription;
+    pipelineDescription.contextFlags = context->contextDescription.flags;
+    pipelineDescription.samplerCount = samplerCount;
+    pipelineDescription.samplers = samplers;
+    pipelineDescription.rootConstantBufferCount = rootConstantCount;
+    pipelineDescription.rootConstantBufferSizes = rootConstants;
+
+    // New interface: will handle RootSignature in backend
+    // set up pipeline descriptor (basically RootSignature and binding)
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID, &pipelineDescription, &context->pipelineComputeLuminancePyramid));
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RCAS, &pipelineDescription, &context->pipelineRCAS));
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_GENERATE_REACTIVE, &pipelineDescription, &context->pipelineGenerateReactive));
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_TCR_AUTOGENERATE, &pipelineDescription, &context->pipelineTcrAutogenerate));
+
+    pipelineDescription.rootConstantBufferCount = 1;
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_DEPTH_CLIP, &pipelineDescription, &context->pipelineDepthClip));
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH, &pipelineDescription, &context->pipelineReconstructPreviousDepth));
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_LOCK, &pipelineDescription, &context->pipelineLock));
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE, &pipelineDescription, &context->pipelineAccumulate));
+    FFX_VALIDATE(context->contextDescription.callbacks.fpCreatePipeline(&context->contextDescription.callbacks, FFX_FSR2_PASS_ACCUMULATE_SHARPEN, &pipelineDescription, &context->pipelineAccumulateSharpen));
+    
+    // for each pipeline: re-route/fix-up IDs based on names
+    patchResourceBindings(&context->pipelineDepthClip);
+    patchResourceBindings(&context->pipelineReconstructPreviousDepth);
+    patchResourceBindings(&context->pipelineLock);
+    patchResourceBindings(&context->pipelineAccumulate);
+    patchResourceBindings(&context->pipelineComputeLuminancePyramid);
+    patchResourceBindings(&context->pipelineAccumulateSharpen);
+    patchResourceBindings(&context->pipelineRCAS);
+    patchResourceBindings(&context->pipelineGenerateReactive);
+    patchResourceBindings(&context->pipelineTcrAutogenerate);
+
+    return FFX_OK;
+}
+
+static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* contextPrivate, const FfxFsr2DispatchDescription* params);
+
+static FfxErrorCode fsr2Create(FfxFsr2Context_Private* context, const FfxFsr2ContextDescription* contextDescription)
+{
+    FFX_ASSERT(context);
+    FFX_ASSERT(contextDescription);
+
+    // Setup the data for implementation.
+    memset(context, 0, sizeof(FfxFsr2Context_Private));
+    context->device = contextDescription->device;
+
+    memcpy(&context->contextDescription, contextDescription, sizeof(FfxFsr2ContextDescription));
+
+    if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEBUG_CHECKING) == FFX_FSR2_ENABLE_DEBUG_CHECKING)
+    {
+        if (context->contextDescription.fpMessage == nullptr)
+        {
+            FFX_ASSERT(context->contextDescription.fpMessage != nullptr);
+            // remove the debug checking flag - we have no message function
+            context->contextDescription.flags &= ~FFX_FSR2_ENABLE_DEBUG_CHECKING;
+        }
+    }
+
+    // Create the device.
+    FfxErrorCode errorCode = context->contextDescription.callbacks.fpCreateBackendContext(&context->contextDescription.callbacks, context->device);
+    FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+    // call out for device caps.
+    errorCode = context->contextDescription.callbacks.fpGetDeviceCapabilities(&context->contextDescription.callbacks, &context->deviceCapabilities, context->device);
+    FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+
+    // set defaults
+    context->firstExecution = true;
+    context->resourceFrameIndex = 0;
+
+    context->constants.displaySize[0] = contextDescription->displaySize.width;
+    context->constants.displaySize[1] = contextDescription->displaySize.height;
+
+    // generate the data for the LUT.
+    const uint32_t lanczos2LutWidth = 128;
+    int16_t lanczos2Weights[lanczos2LutWidth] = { };
+
+    for (uint32_t currentLanczosWidthIndex = 0; currentLanczosWidthIndex < lanczos2LutWidth; currentLanczosWidthIndex++) {
+
+        const float x = 2.0f * currentLanczosWidthIndex / float(lanczos2LutWidth - 1);
+        const float y = lanczos2(x);
+        lanczos2Weights[currentLanczosWidthIndex] = int16_t(roundf(y * 32767.0f));
+    }
+
+    // upload path only supports R16_SNORM, let's go and convert
+    int16_t maximumBias[FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH * FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT];
+    for (uint32_t i = 0; i < FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH * FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT; ++i) {
+
+        maximumBias[i] = int16_t(roundf(ffxFsr2MaximumBias[i] / 2.0f * 32767.0f));
+    }
+
+    uint8_t defaultReactiveMaskData = 0U;
+    uint32_t atomicInitData = 0U;
+    float defaultExposure[] = { 0.0f, 0.0f };
+    const FfxResourceType texture1dResourceType = (context->contextDescription.flags & FFX_FSR2_ENABLE_TEXTURE1D_USAGE) ? FFX_RESOURCE_TYPE_TEXTURE1D : FFX_RESOURCE_TYPE_TEXTURE2D;
+
+    // declare internal resources needed
+    const Fsr2ResourceDescription internalSurfaceDesc[] = {
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR, L"FSR2_PreparedInputColor", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH, L"FSR2_ReconstructedPrevNearestDepth", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R32_UINT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1, L"FSR2_InternalDilatedVelocity1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2, L"FSR2_InternalDilatedVelocity2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH, L"FSR2_DilatedDepth", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R32_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
+            
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1, L"FSR2_LockStatus1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2, L"FSR2_LockStatus2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R16G16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA, L"FSR2_LockInputLuma", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS, L"FSR2_NewLocks", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1, L"FSR2_InternalUpscaled1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2, L"FSR2_InternalUpscaled2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE, L"FSR2_ExposureMips", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R16_FLOAT, contextDescription->maxRenderSize.width / 2, contextDescription->maxRenderSize.height / 2, 0, FFX_RESOURCE_FLAGS_ALIASABLE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1, L"FSR2_LumaHistory1", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2, L"FSR2_LumaHistory2", (FfxResourceUsage)(FFX_RESOURCE_USAGE_RENDERTARGET | FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R8G8B8A8_UNORM, contextDescription->displaySize.width, contextDescription->displaySize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT, L"FSR2_SpdAtomicCounter", (FfxResourceUsage)(FFX_RESOURCE_USAGE_UAV),
+            FFX_SURFACE_FORMAT_R32_UINT, 1, 1, 1, FFX_RESOURCE_FLAGS_ALIASABLE, sizeof(atomicInitData), &atomicInitData },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS, L"FSR2_DilatedReactiveMasks", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R8G8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_ALIASABLE },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT, L"FSR2_LanczosLutData", FFX_RESOURCE_USAGE_READ_ONLY,
+            FFX_SURFACE_FORMAT_R16_SNORM, lanczos2LutWidth, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(lanczos2Weights), lanczos2Weights },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY, L"FSR2_DefaultReactiviyMask", FFX_RESOURCE_USAGE_READ_ONLY,
+            FFX_SURFACE_FORMAT_R8_UNORM, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultReactiveMaskData), &defaultReactiveMaskData },
+
+        {   FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT, L"FSR2_MaximumUpsampleBias", FFX_RESOURCE_USAGE_READ_ONLY,
+            FFX_SURFACE_FORMAT_R16_SNORM, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH, FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(maximumBias), maximumBias },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE, L"FSR2_DefaultExposure", FFX_RESOURCE_USAGE_READ_ONLY,
+            FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE, sizeof(defaultExposure), defaultExposure },
+
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE, L"FSR2_AutoExposure", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R32G32_FLOAT, 1, 1, 1, FFX_RESOURCE_FLAGS_NONE },
+
+
+        // only one for now, will need pingpont to respect the motion vectors
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE, L"FSR2_AutoReactive", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION, L"FSR2_AutoComposition", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R8_UNORM, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1, L"FSR2_PrevPreAlpha0", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1, L"FSR2_PrevPostAlpha0", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2, L"FSR2_PrevPreAlpha1", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+        {   FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2, L"FSR2_PrevPostAlpha1", FFX_RESOURCE_USAGE_UAV,
+            FFX_SURFACE_FORMAT_R11G11B10_FLOAT, contextDescription->maxRenderSize.width, contextDescription->maxRenderSize.height, 1, FFX_RESOURCE_FLAGS_NONE },
+
+    };
+
+    // clear the SRV resources to NULL.
+    memset(context->srvResources, 0, sizeof(context->srvResources));
+
+    for (int32_t currentSurfaceIndex = 0; currentSurfaceIndex < FFX_ARRAY_ELEMENTS(internalSurfaceDesc); ++currentSurfaceIndex) {
+
+        const Fsr2ResourceDescription* currentSurfaceDescription = &internalSurfaceDesc[currentSurfaceIndex];
+        const FfxResourceType resourceType = currentSurfaceDescription->height > 1 ? FFX_RESOURCE_TYPE_TEXTURE2D : texture1dResourceType;
+        const FfxResourceDescription resourceDescription = { resourceType, currentSurfaceDescription->format, currentSurfaceDescription->width, currentSurfaceDescription->height, 1, currentSurfaceDescription->mipCount };
+        const FfxResourceStates initialState = (currentSurfaceDescription->usage == FFX_RESOURCE_USAGE_READ_ONLY) ? FFX_RESOURCE_STATE_COMPUTE_READ : FFX_RESOURCE_STATE_UNORDERED_ACCESS;
+        const FfxCreateResourceDescription createResourceDescription = { FFX_HEAP_TYPE_DEFAULT, resourceDescription, initialState, currentSurfaceDescription->initDataSize, currentSurfaceDescription->initData, currentSurfaceDescription->name, currentSurfaceDescription->usage, currentSurfaceDescription->id };
+
+        FFX_VALIDATE(context->contextDescription.callbacks.fpCreateResource(&context->contextDescription.callbacks, &createResourceDescription, &context->srvResources[currentSurfaceDescription->id]));
+    }
+
+    // copy resources to uavResrouces list
+    memcpy(context->uavResources, context->srvResources, sizeof(context->srvResources));
+
+    // avoid compiling pipelines on first render
+    {
+        context->refreshPipelineStates = false;
+        errorCode = createPipelineStates(context);
+        FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+    }
+    return FFX_OK;
+}
+
+static void fsr2SafeReleasePipeline(FfxFsr2Context_Private* context, FfxPipelineState* pipeline)
+{
+    FFX_ASSERT(pipeline);
+
+    context->contextDescription.callbacks.fpDestroyPipeline(&context->contextDescription.callbacks, pipeline);
+}
+
+static void fsr2SafeReleaseResource(FfxFsr2Context_Private* context, FfxResourceInternal resource)
+{
+    context->contextDescription.callbacks.fpDestroyResource(&context->contextDescription.callbacks, resource);
+}
+
+static void fsr2SafeReleaseDevice(FfxFsr2Context_Private* context, FfxDevice* device)
+{
+    if (*device == nullptr) {
+        return;
+    }
+
+    context->contextDescription.callbacks.fpDestroyBackendContext(&context->contextDescription.callbacks);
+    *device = nullptr;
+}
+
+static FfxErrorCode fsr2Release(FfxFsr2Context_Private* context)
+{
+    FFX_ASSERT(context);
+
+    fsr2SafeReleasePipeline(context, &context->pipelineDepthClip);
+    fsr2SafeReleasePipeline(context, &context->pipelineReconstructPreviousDepth);
+    fsr2SafeReleasePipeline(context, &context->pipelineLock);
+    fsr2SafeReleasePipeline(context, &context->pipelineAccumulate);
+    fsr2SafeReleasePipeline(context, &context->pipelineAccumulateSharpen);
+    fsr2SafeReleasePipeline(context, &context->pipelineRCAS);
+    fsr2SafeReleasePipeline(context, &context->pipelineComputeLuminancePyramid);
+    fsr2SafeReleasePipeline(context, &context->pipelineGenerateReactive);
+    fsr2SafeReleasePipeline(context, &context->pipelineTcrAutogenerate);
+
+    // unregister resources not created internally
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT] = { FFX_FSR2_RESOURCE_IDENTIFIER_NULL };
+
+    // release internal resources
+    for (int32_t currentResourceIndex = 0; currentResourceIndex < FFX_FSR2_RESOURCE_IDENTIFIER_COUNT; ++currentResourceIndex) {
+
+        fsr2SafeReleaseResource(context, context->srvResources[currentResourceIndex]);
+    }
+
+    fsr2SafeReleaseDevice(context, &context->device);
+
+    return FFX_OK;
+}
+
+static void setupDeviceDepthToViewSpaceDepthParams(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params)
+{
+    const bool bInverted = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INVERTED) == FFX_FSR2_ENABLE_DEPTH_INVERTED;
+    const bool bInfinite = (context->contextDescription.flags & FFX_FSR2_ENABLE_DEPTH_INFINITE) == FFX_FSR2_ENABLE_DEPTH_INFINITE;
+
+    // make sure it has no impact if near and far plane values are swapped in dispatch params
+    // the flags "inverted" and "infinite" will decide what transform to use
+    float fMin = FFX_MINIMUM(params->cameraNear, params->cameraFar);
+    float fMax = FFX_MAXIMUM(params->cameraNear, params->cameraFar);
+
+    if (bInverted) {
+        float tmp = fMin;
+        fMin = fMax;
+        fMax = tmp;
+    }
+
+    // a 0 0 0   x
+    // 0 b 0 0   y
+    // 0 0 c d   z
+    // 0 0 e 0   1
+
+    const float fQ = fMax / (fMin - fMax);
+    const float d = -1.0f; // for clarity
+
+    const float matrix_elem_c[2][2] = {
+        fQ,                     // non reversed, non infinite
+        -1.0f - FLT_EPSILON,    // non reversed, infinite
+        fQ,                     // reversed, non infinite
+        0.0f + FLT_EPSILON      // reversed, infinite
+    };
+
+    const float matrix_elem_e[2][2] = {
+        fQ * fMin,             // non reversed, non infinite
+        -fMin - FLT_EPSILON,    // non reversed, infinite
+        fQ * fMin,             // reversed, non infinite
+        fMax,                  // reversed, infinite
+    };
+
+    context->constants.deviceToViewDepth[0] = d * matrix_elem_c[bInverted][bInfinite];
+    context->constants.deviceToViewDepth[1] = matrix_elem_e[bInverted][bInfinite];
+
+    // revert x and y coords
+    const float aspect = params->renderSize.width / float(params->renderSize.height);
+    const float cotHalfFovY = cosf(0.5f * params->cameraFovAngleVertical) / sinf(0.5f * params->cameraFovAngleVertical);
+    const float a = cotHalfFovY / aspect;
+    const float b = cotHalfFovY;
+
+    context->constants.deviceToViewDepth[2] = (1.0f / a);
+    context->constants.deviceToViewDepth[3] = (1.0f / b);
+}
+
+static void scheduleDispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params, const FfxPipelineState* pipeline, uint32_t dispatchX, uint32_t dispatchY)
+{
+    FfxComputeJobDescription jobDescriptor = {};
+
+    for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
+
+        const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
+        const FfxResourceInternal currentResource = context->srvResources[currentResourceId];
+        jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
+        wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
+    }
+
+    for (uint32_t currentUnorderedAccessViewIndex = 0; currentUnorderedAccessViewIndex < pipeline->uavCount; ++currentUnorderedAccessViewIndex) {
+
+        const uint32_t currentResourceId = pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].resourceIdentifier;
+        wcscpy_s(jobDescriptor.uavNames[currentUnorderedAccessViewIndex], pipeline->uavResourceBindings[currentUnorderedAccessViewIndex].name);
+
+        if (currentResourceId >= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0 && currentResourceId <= FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12)
+        {
+            const FfxResourceInternal currentResource = context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE];
+            jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource;
+            jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = currentResourceId - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0;
+        }
+        else
+        {
+            const FfxResourceInternal currentResource = context->uavResources[currentResourceId];
+            jobDescriptor.uavs[currentUnorderedAccessViewIndex] = currentResource;
+            jobDescriptor.uavMip[currentUnorderedAccessViewIndex] = 0;
+        }
+    }
+    
+    jobDescriptor.dimensions[0] = dispatchX;
+    jobDescriptor.dimensions[1] = dispatchY;
+    jobDescriptor.dimensions[2] = 1;
+    jobDescriptor.pipeline = *pipeline;
+
+    for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
+        wcscpy_s( jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name);
+        jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier];
+        jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex;
+    }
+
+    FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+    dispatchJob.computeJobDescriptor = jobDescriptor;
+
+    context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &dispatchJob);
+}
+
+static FfxErrorCode fsr2Dispatch(FfxFsr2Context_Private* context, const FfxFsr2DispatchDescription* params)
+{
+    if ((context->contextDescription.flags & FFX_FSR2_ENABLE_DEBUG_CHECKING) == FFX_FSR2_ENABLE_DEBUG_CHECKING)
+    {
+        fsr2DebugCheckDispatch(context, params);
+    }
+    // take a short cut to the command list
+    FfxCommandList commandList = params->commandList;
+
+    // try and refresh shaders first. Early exit in case of error.
+    if (context->refreshPipelineStates) {
+
+        context->refreshPipelineStates = false;
+
+        const FfxErrorCode errorCode = createPipelineStates(context);
+        FFX_RETURN_ON_ERROR(errorCode == FFX_OK, errorCode);
+    }
+
+    if (context->firstExecution)
+    {
+        FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
+
+        const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
+        memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
+
+        clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1];
+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+        clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2];
+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+        clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR];
+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+    }
+
+    // Prepare per frame descriptor tables
+    const bool isOddFrame = !!(context->resourceFrameIndex & 1);
+    const uint32_t currentCpuOnlyTableBase = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_COUNT : 0;
+    const uint32_t currentGpuTableBase = 2 * FFX_FSR2_RESOURCE_IDENTIFIER_COUNT * context->resourceFrameIndex;
+    const uint32_t lockStatusSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1;
+    const uint32_t lockStatusUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2;
+    const uint32_t upscaledColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1;
+    const uint32_t upscaledColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2;
+    const uint32_t dilatedMotionVectorsResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1;
+    const uint32_t previousDilatedMotionVectorsResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1 : FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2;
+    const uint32_t lumaHistorySrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2 : FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1;
+    const uint32_t lumaHistoryUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1 : FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2;
+
+    const uint32_t prevPreAlphaColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1;
+    const uint32_t prevPreAlphaColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2;
+    const uint32_t prevPostAlphaColorSrvResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1;
+    const uint32_t prevPostAlphaColorUavResourceIndex = isOddFrame ? FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1 : FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2;
+
+    const bool resetAccumulation = params->reset || context->firstExecution;
+    context->firstExecution = false;
+
+    context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->color, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+    context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->depth, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH]);
+    context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->motionVectors, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS]);
+
+    // if auto exposure is enabled use the auto exposure SRV, otherwise what the app sends.
+    if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE) {
+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE];
+    } else {
+        if (ffxFsr2ResourceIsNull(params->exposure)) {
+            context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE];
+        } else {
+            context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->exposure, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE]);
+        }
+    }
+ 
+    if (params->enableAutoReactive)
+    {
+        context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->colorOpaqueOnly, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]);
+    }
+    
+    if (ffxFsr2ResourceIsNull(params->reactive)) {
+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY];
+    }
+    else {
+        context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->reactive, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
+    }
+    
+    if (ffxFsr2ResourceIsNull(params->transparencyAndComposition)) {
+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY];
+    } else {
+        context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->transparencyAndComposition, &context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK]);
+    }
+
+    context->contextDescription.callbacks.fpRegisterResource(&context->contextDescription.callbacks, &params->output, &context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT]);
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = context->srvResources[lockStatusSrvResourceIndex];
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->srvResources[upscaledColorSrvResourceIndex];
+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS] = context->uavResources[lockStatusUavResourceIndex];
+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR] = context->uavResources[upscaledColorUavResourceIndex];
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT] = context->uavResources[upscaledColorUavResourceIndex];
+
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = context->srvResources[dilatedMotionVectorsResourceIndex];
+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS] = context->uavResources[dilatedMotionVectorsResourceIndex];
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS] = context->srvResources[previousDilatedMotionVectorsResourceIndex];
+
+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->uavResources[lumaHistoryUavResourceIndex];
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY] = context->srvResources[lumaHistorySrvResourceIndex];
+
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]  = context->srvResources[prevPreAlphaColorSrvResourceIndex];
+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR]  = context->uavResources[prevPreAlphaColorUavResourceIndex];
+    context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR] = context->srvResources[prevPostAlphaColorSrvResourceIndex];
+    context->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR] = context->uavResources[prevPostAlphaColorUavResourceIndex];
+
+    // actual resource size may differ from render/display resolution (e.g. due to Hw/API restrictions), so query the descriptor for UVs adjustment
+    const FfxResourceDescription resourceDescInputColor = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+    const FfxResourceDescription resourceDescLockStatus = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[lockStatusSrvResourceIndex]);
+    const FfxResourceDescription resourceDescReactiveMask = context->contextDescription.callbacks.fpGetResourceDescription(&context->contextDescription.callbacks, context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK]);
+    FFX_ASSERT(resourceDescInputColor.type == FFX_RESOURCE_TYPE_TEXTURE2D);
+    FFX_ASSERT(resourceDescLockStatus.type == FFX_RESOURCE_TYPE_TEXTURE2D);
+
+    context->constants.jitterOffset[0] = params->jitterOffset.x;
+    context->constants.jitterOffset[1] = params->jitterOffset.y;
+    context->constants.renderSize[0] = int32_t(params->renderSize.width ? params->renderSize.width   : resourceDescInputColor.width);
+    context->constants.renderSize[1] = int32_t(params->renderSize.height ? params->renderSize.height : resourceDescInputColor.height);
+    context->constants.maxRenderSize[0] = int32_t(context->contextDescription.maxRenderSize.width);
+    context->constants.maxRenderSize[1] = int32_t(context->contextDescription.maxRenderSize.height);
+    context->constants.inputColorResourceDimensions[0] = resourceDescInputColor.width;
+    context->constants.inputColorResourceDimensions[1] = resourceDescInputColor.height;
+
+    // compute the horizontal FOV for the shader from the vertical one.
+    const float aspectRatio = (float)params->renderSize.width / (float)params->renderSize.height;
+    const float cameraAngleHorizontal = atan(tan(params->cameraFovAngleVertical / 2) * aspectRatio) * 2;
+    context->constants.tanHalfFOV = tanf(cameraAngleHorizontal * 0.5f);
+    context->constants.viewSpaceToMetersFactor = (params->viewSpaceToMetersFactor > 0.0f) ? params->viewSpaceToMetersFactor : 1.0f;
+
+    // compute params to enable device depth to view space depth computation in shader
+    setupDeviceDepthToViewSpaceDepthParams(context, params);
+
+    // To be updated if resource is larger than the actual image size
+    context->constants.downscaleFactor[0] = float(context->constants.renderSize[0]) / context->contextDescription.displaySize.width;
+    context->constants.downscaleFactor[1] = float(context->constants.renderSize[1]) / context->contextDescription.displaySize.height;
+    context->constants.previousFramePreExposure = context->constants.preExposure;
+    context->constants.preExposure = (params->preExposure != 0) ? params->preExposure : 1.0f;
+
+    // motion vector data
+    const int32_t* motionVectorsTargetSize = (context->contextDescription.flags & FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS) ? context->constants.displaySize : context->constants.renderSize;
+
+    context->constants.motionVectorScale[0] = (params->motionVectorScale.x / motionVectorsTargetSize[0]);
+    context->constants.motionVectorScale[1] = (params->motionVectorScale.y / motionVectorsTargetSize[1]);
+
+    // compute jitter cancellation
+    if (context->contextDescription.flags & FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION) {
+
+        context->constants.motionVectorJitterCancellation[0] = (context->previousJitterOffset[0] - context->constants.jitterOffset[0]) / motionVectorsTargetSize[0];
+        context->constants.motionVectorJitterCancellation[1] = (context->previousJitterOffset[1] - context->constants.jitterOffset[1]) / motionVectorsTargetSize[1];
+
+        context->previousJitterOffset[0] = context->constants.jitterOffset[0];
+        context->previousJitterOffset[1] = context->constants.jitterOffset[1];
+    }
+
+    // lock data, assuming jitter sequence length computation for now
+    const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(params->renderSize.width, context->contextDescription.displaySize.width);
+
+    // init on first frame
+    if (resetAccumulation || context->constants.jitterPhaseCount == 0) {
+        context->constants.jitterPhaseCount = (float)jitterPhaseCount;
+    } else {
+        const int32_t jitterPhaseCountDelta = (int32_t)(jitterPhaseCount - context->constants.jitterPhaseCount);
+        if (jitterPhaseCountDelta > 0) {
+            context->constants.jitterPhaseCount++;
+        } else if (jitterPhaseCountDelta < 0) {
+            context->constants.jitterPhaseCount--;
+        }
+    }
+
+    // convert delta time to seconds and clamp to [0, 1].
+    context->constants.deltaTime = FFX_MAXIMUM(0.0f, FFX_MINIMUM(1.0f, params->frameTimeDelta / 1000.0f));
+
+    if (resetAccumulation) {
+        context->constants.frameIndex = 0;
+    } else {
+        context->constants.frameIndex++;
+    }
+
+    // shading change usage of the SPD mip levels.
+    context->constants.lumaMipLevelToUse = uint32_t(FFX_FSR2_SHADING_CHANGE_MIP_LEVEL);
+
+    const float mipDiv = float(2 << context->constants.lumaMipLevelToUse);
+    context->constants.lumaMipDimensions[0] = uint32_t(context->constants.maxRenderSize[0] / mipDiv);
+    context->constants.lumaMipDimensions[1] = uint32_t(context->constants.maxRenderSize[1] / mipDiv);
+
+	// -- GODOT start --
+    memcpy(context->constants.reprojectionMatrix, params->reprojectionMatrix, sizeof(context->constants.reprojectionMatrix));
+	// -- GODOT end --
+
+    // reactive mask bias
+    const int32_t threadGroupWorkRegionDim = 8;
+    const int32_t dispatchSrcX = (context->constants.renderSize[0] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+    const int32_t dispatchSrcY = (context->constants.renderSize[1] + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+    const int32_t dispatchDstX = (context->contextDescription.displaySize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+    const int32_t dispatchDstY = (context->contextDescription.displaySize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+    // Clear reconstructed depth for max depth store.
+    if (resetAccumulation) {
+
+        FfxGpuJobDescription clearJob = { FFX_GPU_JOB_CLEAR_FLOAT };
+
+        // LockStatus resource has no sign bit, callback functions are compensating for this.
+        // Clearing the resource must follow the same logic.
+        float clearValuesLockStatus[4]{};
+        clearValuesLockStatus[LOCK_LIFETIME_REMAINING] = 0.0f;
+        clearValuesLockStatus[LOCK_TEMPORAL_LUMA] = 0.0f;
+
+        memcpy(clearJob.clearJobDescriptor.color, clearValuesLockStatus, 4 * sizeof(float));
+        clearJob.clearJobDescriptor.target = context->srvResources[lockStatusSrvResourceIndex];
+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+
+        const float clearValuesToZeroFloat[]{ 0.f, 0.f, 0.f, 0.f };
+        memcpy(clearJob.clearJobDescriptor.color, clearValuesToZeroFloat, 4 * sizeof(float));
+        clearJob.clearJobDescriptor.target = context->srvResources[upscaledColorSrvResourceIndex];
+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+
+        clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE];
+        context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+
+        //if (context->contextDescription.flags & FFX_FSR2_ENABLE_AUTO_EXPOSURE)
+        // Auto exposure always used to track luma changes in locking logic
+        {
+            const float clearValuesExposure[]{ -1.f, 1e8f, 0.f, 0.f };
+            memcpy(clearJob.clearJobDescriptor.color, clearValuesExposure, 4 * sizeof(float));
+            clearJob.clearJobDescriptor.target = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE];
+            context->contextDescription.callbacks.fpScheduleGpuJob(&context->contextDescription.callbacks, &clearJob);
+        }
+    }
+
+    // Auto exposure
+    uint32_t dispatchThreadGroupCountXY[2];
+    uint32_t workGroupOffset[2];
+    uint32_t numWorkGroupsAndMips[2];
+    uint32_t rectInfo[4] = { 0, 0, params->renderSize.width, params->renderSize.height };
+    SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo);
+
+    // downsample
+    Fsr2SpdConstants luminancePyramidConstants;
+    luminancePyramidConstants.numworkGroups = numWorkGroupsAndMips[0];
+    luminancePyramidConstants.mips = numWorkGroupsAndMips[1];
+    luminancePyramidConstants.workGroupOffset[0] = workGroupOffset[0];
+    luminancePyramidConstants.workGroupOffset[1] = workGroupOffset[1];
+    luminancePyramidConstants.renderSize[0] = params->renderSize.width;
+    luminancePyramidConstants.renderSize[1] = params->renderSize.height;
+
+    // compute the constants.
+    Fsr2RcasConstants rcasConsts = {};
+    const float sharpenessRemapped = (-2.0f * params->sharpness) + 2.0f;
+    FsrRcasCon(rcasConsts.rcasConfig, sharpenessRemapped);
+
+    Fsr2GenerateReactiveConstants2 genReactiveConsts = {};
+    genReactiveConsts.autoTcThreshold = params->autoTcThreshold;
+    genReactiveConsts.autoTcScale = params->autoTcScale;
+    genReactiveConsts.autoReactiveScale = params->autoReactiveScale;
+    genReactiveConsts.autoReactiveMax = params->autoReactiveMax;
+
+    // initialize constantBuffers data
+    memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].data,        &context->constants,        globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2].uint32Size * sizeof(uint32_t));
+    memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].data,         &luminancePyramidConstants, globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD].uint32Size  * sizeof(uint32_t));
+    memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].data,        &rcasConsts,                globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS].uint32Size * sizeof(uint32_t));
+    memcpy(&globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].data, &genReactiveConsts,         globalFsr2ConstantBuffers[FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE].uint32Size * sizeof(uint32_t));
+
+    // Auto reactive
+    if (params->enableAutoReactive)
+    {
+        generateReactiveMaskInternal(context, params);
+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
+        context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK] = context->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION];
+    }
+    scheduleDispatch(context, params, &context->pipelineComputeLuminancePyramid, dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1]);
+    scheduleDispatch(context, params, &context->pipelineReconstructPreviousDepth, dispatchSrcX, dispatchSrcY);
+    scheduleDispatch(context, params, &context->pipelineDepthClip, dispatchSrcX, dispatchSrcY);
+
+    const bool sharpenEnabled = params->enableSharpening;
+
+    scheduleDispatch(context, params, &context->pipelineLock, dispatchSrcX, dispatchSrcY);
+    scheduleDispatch(context, params, sharpenEnabled ? &context->pipelineAccumulateSharpen : &context->pipelineAccumulate, dispatchDstX, dispatchDstY);
+
+    // RCAS
+    if (sharpenEnabled) {
+
+        // dispatch RCAS
+        const int32_t threadGroupWorkRegionDimRCAS = 16;
+        const int32_t dispatchX = (context->contextDescription.displaySize.width + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS;
+        const int32_t dispatchY = (context->contextDescription.displaySize.height + (threadGroupWorkRegionDimRCAS - 1)) / threadGroupWorkRegionDimRCAS;
+        scheduleDispatch(context, params, &context->pipelineRCAS, dispatchX, dispatchY);
+    }
+
+    context->resourceFrameIndex = (context->resourceFrameIndex + 1) % FSR2_MAX_QUEUED_FRAMES;
+
+    // Fsr2MaxQueuedFrames must be an even number.
+    FFX_STATIC_ASSERT((FSR2_MAX_QUEUED_FRAMES & 1) == 0);
+
+    context->contextDescription.callbacks.fpExecuteGpuJobs(&context->contextDescription.callbacks, commandList);
+
+    // release dynamic resources
+    context->contextDescription.callbacks.fpUnregisterResources(&context->contextDescription.callbacks);
+
+    return FFX_OK;
+}
+
+FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextDescription* contextDescription)
+{
+    // zero context memory
+    memset(context, 0, sizeof(FfxFsr2Context));
+
+    // check pointers are valid.
+    FFX_RETURN_ON_ERROR(
+        context,
+        FFX_ERROR_INVALID_POINTER);
+    FFX_RETURN_ON_ERROR(
+        contextDescription,
+        FFX_ERROR_INVALID_POINTER);
+
+    // validate that all callbacks are set for the interface
+    FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpGetDeviceCapabilities, FFX_ERROR_INCOMPLETE_INTERFACE);
+    FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpCreateBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+    FFX_RETURN_ON_ERROR(contextDescription->callbacks.fpDestroyBackendContext, FFX_ERROR_INCOMPLETE_INTERFACE);
+
+    // if a scratch buffer is declared, then we must have a size
+    if (contextDescription->callbacks.scratchBuffer) {
+
+        FFX_RETURN_ON_ERROR(contextDescription->callbacks.scratchBufferSize, FFX_ERROR_INCOMPLETE_INTERFACE);
+    }
+
+    // ensure the context is large enough for the internal context.
+    FFX_STATIC_ASSERT(sizeof(FfxFsr2Context) >= sizeof(FfxFsr2Context_Private));
+
+    // create the context.
+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
+    const FfxErrorCode errorCode = fsr2Create(contextPrivate, contextDescription);
+
+    return errorCode;
+}
+
+FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context)
+{
+    FFX_RETURN_ON_ERROR(
+        context,
+        FFX_ERROR_INVALID_POINTER);
+
+    // destroy the context.
+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
+    const FfxErrorCode errorCode = fsr2Release(contextPrivate);
+    return errorCode;
+}
+
+FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* context, const FfxFsr2DispatchDescription* dispatchParams)
+{
+    FFX_RETURN_ON_ERROR(
+        context,
+        FFX_ERROR_INVALID_POINTER);
+    FFX_RETURN_ON_ERROR(
+        dispatchParams,
+        FFX_ERROR_INVALID_POINTER);
+
+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
+
+    // validate that renderSize is within the maximum.
+    FFX_RETURN_ON_ERROR(
+        dispatchParams->renderSize.width <= contextPrivate->contextDescription.maxRenderSize.width,
+        FFX_ERROR_OUT_OF_RANGE);
+    FFX_RETURN_ON_ERROR(
+        dispatchParams->renderSize.height <= contextPrivate->contextDescription.maxRenderSize.height,
+        FFX_ERROR_OUT_OF_RANGE);
+    FFX_RETURN_ON_ERROR(
+        contextPrivate->device,
+        FFX_ERROR_NULL_DEVICE);
+
+    // dispatch the FSR2 passes.
+    const FfxErrorCode errorCode = fsr2Dispatch(contextPrivate, dispatchParams);
+    return errorCode;
+}
+
+float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMode)
+{
+    switch (qualityMode) {
+
+    case FFX_FSR2_QUALITY_MODE_QUALITY:
+        return 1.5f;
+    case FFX_FSR2_QUALITY_MODE_BALANCED:
+        return 1.7f;
+    case FFX_FSR2_QUALITY_MODE_PERFORMANCE:
+        return 2.0f;
+    case FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE:
+        return 3.0f;
+    default:
+        return 0.0f;
+    }
+}
+
+FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
+    uint32_t* renderWidth,
+    uint32_t* renderHeight,
+    uint32_t displayWidth,
+    uint32_t displayHeight,
+    FfxFsr2QualityMode qualityMode)
+{
+    FFX_RETURN_ON_ERROR(
+        renderWidth,
+        FFX_ERROR_INVALID_POINTER);
+    FFX_RETURN_ON_ERROR(
+        renderHeight,
+        FFX_ERROR_INVALID_POINTER);
+    FFX_RETURN_ON_ERROR(
+        FFX_FSR2_QUALITY_MODE_QUALITY <= qualityMode && qualityMode <= FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE,
+        FFX_ERROR_INVALID_ENUM);
+
+    // scale by the predefined ratios in each dimension.
+    const float ratio = ffxFsr2GetUpscaleRatioFromQualityMode(qualityMode);
+    const uint32_t scaledDisplayWidth = (uint32_t)((float)displayWidth / ratio);
+    const uint32_t scaledDisplayHeight = (uint32_t)((float)displayHeight / ratio);
+    *renderWidth = scaledDisplayWidth;
+    *renderHeight = scaledDisplayHeight;
+
+    return FFX_OK;
+}
+
+FfxErrorCode ffxFsr2ContextEnqueueRefreshPipelineRequest(FfxFsr2Context* context)
+{
+    FFX_RETURN_ON_ERROR(
+        context,
+        FFX_ERROR_INVALID_POINTER);
+
+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)context;
+    contextPrivate->refreshPipelineStates = true;
+
+    return FFX_OK;
+}
+
+int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth)
+{
+    const float basePhaseCount = 8.0f;
+    const int32_t jitterPhaseCount = int32_t(basePhaseCount * pow((float(displayWidth) / renderWidth), 2.0f));
+    return jitterPhaseCount;
+}
+
+FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount)
+{
+    FFX_RETURN_ON_ERROR(
+        outX,
+        FFX_ERROR_INVALID_POINTER);
+    FFX_RETURN_ON_ERROR(
+        outY,
+        FFX_ERROR_INVALID_POINTER);
+    FFX_RETURN_ON_ERROR(
+        phaseCount > 0,
+        FFX_ERROR_INVALID_ARGUMENT);
+
+    const float x = halton((index % phaseCount) + 1, 2) - 0.5f;
+    const float y = halton((index % phaseCount) + 1, 3) - 0.5f;
+
+    *outX = x;
+    *outY = y;
+    return FFX_OK;
+}
+
+FFX_API bool ffxFsr2ResourceIsNull(FfxResource resource)
+{
+    return resource.resource == NULL;
+}
+
+FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const FfxFsr2GenerateReactiveDescription* params)
+{
+    FFX_RETURN_ON_ERROR(
+        context,
+        FFX_ERROR_INVALID_POINTER);
+    FFX_RETURN_ON_ERROR(
+        params,
+        FFX_ERROR_INVALID_POINTER);
+    FFX_RETURN_ON_ERROR(
+        params->commandList,
+        FFX_ERROR_INVALID_POINTER);
+
+    FfxFsr2Context_Private* contextPrivate = (FfxFsr2Context_Private*)(context);
+
+    FFX_RETURN_ON_ERROR(
+        contextPrivate->device,
+        FFX_ERROR_NULL_DEVICE);
+
+    if (contextPrivate->refreshPipelineStates) {
+
+        createPipelineStates(contextPrivate);
+        contextPrivate->refreshPipelineStates = false;
+    }
+
+    // take a short cut to the command list
+    FfxCommandList commandList = params->commandList;
+
+    FfxPipelineState* pipeline = &contextPrivate->pipelineGenerateReactive;
+
+    const int32_t threadGroupWorkRegionDim = 8;
+    const int32_t dispatchSrcX = (params->renderSize.width  + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+    const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+    // save internal reactive resource
+    FfxResourceInternal internalReactive = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
+
+    FfxComputeJobDescription jobDescriptor = {};
+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->colorPreUpscale, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->outReactive, &contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE]);
+    
+    jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
+
+    wcscpy_s(jobDescriptor.srvNames[0], pipeline->srvResourceBindings[0].name);
+    wcscpy_s(jobDescriptor.srvNames[1], pipeline->srvResourceBindings[1].name);
+    wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name);
+
+    jobDescriptor.dimensions[0] = dispatchSrcX;
+    jobDescriptor.dimensions[1] = dispatchSrcY;
+    jobDescriptor.dimensions[2] = 1;
+    jobDescriptor.pipeline = *pipeline;
+
+    for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
+
+        const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
+        const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId];
+        jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
+        wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
+    }
+
+    Fsr2GenerateReactiveConstants constants = {};
+    constants.scale = params->scale;
+    constants.threshold = params->cutoffThreshold;
+    constants.binaryValue = params->binaryValue;
+    constants.flags = params->flags;
+
+    jobDescriptor.cbs[0].uint32Size = sizeof(constants);
+    memcpy(&jobDescriptor.cbs[0].data, &constants, sizeof(constants));
+    wcscpy_s(jobDescriptor.cbNames[0], pipeline->cbResourceBindings[0].name);
+
+    FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+    dispatchJob.computeJobDescriptor = jobDescriptor;
+
+    contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob);
+
+    contextPrivate->contextDescription.callbacks.fpExecuteGpuJobs(&contextPrivate->contextDescription.callbacks, commandList);
+
+    // restore internal reactive
+    contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE] = internalReactive;
+
+    return FFX_OK;
+}
+
+static FfxErrorCode generateReactiveMaskInternal(FfxFsr2Context_Private* contextPrivate, const FfxFsr2DispatchDescription* params)
+{
+    if (contextPrivate->refreshPipelineStates) {
+
+        createPipelineStates(contextPrivate);
+        contextPrivate->refreshPipelineStates = false;
+    }
+
+    // take a short cut to the command list
+    FfxCommandList commandList = params->commandList;
+
+    FfxPipelineState* pipeline = &contextPrivate->pipelineTcrAutogenerate;
+
+    const int32_t threadGroupWorkRegionDim = 8;
+    const int32_t dispatchSrcX = (params->renderSize.width + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+    const int32_t dispatchSrcY = (params->renderSize.height + (threadGroupWorkRegionDim - 1)) / threadGroupWorkRegionDim;
+
+    FfxComputeJobDescription jobDescriptor = {};
+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->colorOpaqueOnly, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY]);
+    contextPrivate->contextDescription.callbacks.fpRegisterResource(&contextPrivate->contextDescription.callbacks, &params->color, &contextPrivate->srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR]);
+
+    jobDescriptor.uavs[0] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE];
+    jobDescriptor.uavs[1] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION];
+    jobDescriptor.uavs[2] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR];
+    jobDescriptor.uavs[3] = contextPrivate->uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR];
+
+    wcscpy_s(jobDescriptor.uavNames[0], pipeline->uavResourceBindings[0].name);
+    wcscpy_s(jobDescriptor.uavNames[1], pipeline->uavResourceBindings[1].name);
+    wcscpy_s(jobDescriptor.uavNames[2], pipeline->uavResourceBindings[2].name);
+    wcscpy_s(jobDescriptor.uavNames[3], pipeline->uavResourceBindings[3].name);
+
+    jobDescriptor.dimensions[0] = dispatchSrcX;
+    jobDescriptor.dimensions[1] = dispatchSrcY;
+    jobDescriptor.dimensions[2] = 1;
+    jobDescriptor.pipeline = *pipeline;
+
+    for (uint32_t currentShaderResourceViewIndex = 0; currentShaderResourceViewIndex < pipeline->srvCount; ++currentShaderResourceViewIndex) {
+
+        const uint32_t currentResourceId = pipeline->srvResourceBindings[currentShaderResourceViewIndex].resourceIdentifier;
+        const FfxResourceInternal currentResource = contextPrivate->srvResources[currentResourceId];
+        jobDescriptor.srvs[currentShaderResourceViewIndex] = currentResource;
+        wcscpy_s(jobDescriptor.srvNames[currentShaderResourceViewIndex], pipeline->srvResourceBindings[currentShaderResourceViewIndex].name);
+    }
+
+    for (uint32_t currentRootConstantIndex = 0; currentRootConstantIndex < pipeline->constCount; ++currentRootConstantIndex) {
+        wcscpy_s(jobDescriptor.cbNames[currentRootConstantIndex], pipeline->cbResourceBindings[currentRootConstantIndex].name);
+        jobDescriptor.cbs[currentRootConstantIndex] = globalFsr2ConstantBuffers[pipeline->cbResourceBindings[currentRootConstantIndex].resourceIdentifier];
+        jobDescriptor.cbSlotIndex[currentRootConstantIndex] = pipeline->cbResourceBindings[currentRootConstantIndex].slotIndex;
+    }
+
+    FfxGpuJobDescription dispatchJob = { FFX_GPU_JOB_COMPUTE };
+    dispatchJob.computeJobDescriptor = jobDescriptor;
+
+    contextPrivate->contextDescription.callbacks.fpScheduleGpuJob(&contextPrivate->contextDescription.callbacks, &dispatchJob);
+
+    return FFX_OK;
+}
diff --git a/thirdparty/amd-fsr2/ffx_fsr2.h b/thirdparty/amd-fsr2/ffx_fsr2.h
new file mode 100644
index 0000000000..7df3773ccc
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_fsr2.h
@@ -0,0 +1,458 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+// @defgroup FSR2
+
+#pragma once
+
+// Include the interface for the backend of the FSR2 API.
+#include "ffx_fsr2_interface.h"
+
+/// FidelityFX Super Resolution 2 major version.
+///
+/// @ingroup FSR2
+#define FFX_FSR2_VERSION_MAJOR      (2)
+
+/// FidelityFX Super Resolution 2 minor version.
+///
+/// @ingroup FSR2
+#define FFX_FSR2_VERSION_MINOR      (2)
+
+/// FidelityFX Super Resolution 2 patch version.
+///
+/// @ingroup FSR2
+#define FFX_FSR2_VERSION_PATCH      (1)
+
+/// The size of the context specified in 32bit values.
+///
+/// @ingroup FSR2
+#define FFX_FSR2_CONTEXT_SIZE       (16536)
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+/// An enumeration of all the quality modes supported by FidelityFX Super
+/// Resolution 2 upscaling.
+///
+/// In order to provide a consistent user experience across multiple
+/// applications which implement FSR2. It is strongly recommended that the
+/// following preset scaling factors are made available through your
+/// application's user interface.
+///
+/// If your application does not expose the notion of preset scaling factors
+/// for upscaling algorithms (perhaps instead implementing a fixed ratio which
+/// is immutable) or implementing a more dynamic scaling scheme (such as
+/// dynamic resolution scaling), then there is no need to use these presets.
+///
+/// Please note that <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> is
+/// an optional mode which may introduce significant quality degradation in the
+/// final image. As such it is recommended that you evaluate the final results
+/// of using this scaling mode before deciding if you should include it in your
+/// application.
+///
+/// @ingroup FSR2
+typedef enum FfxFsr2QualityMode {
+
+    FFX_FSR2_QUALITY_MODE_QUALITY                       = 1,        ///< Perform upscaling with a per-dimension upscaling ratio of 1.5x.
+    FFX_FSR2_QUALITY_MODE_BALANCED                      = 2,        ///< Perform upscaling with a per-dimension upscaling ratio of 1.7x.
+    FFX_FSR2_QUALITY_MODE_PERFORMANCE                   = 3,        ///< Perform upscaling with a per-dimension upscaling ratio of 2.0x.
+    FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE             = 4         ///< Perform upscaling with a per-dimension upscaling ratio of 3.0x.
+} FfxFsr2QualityMode;
+
+/// An enumeration of bit flags used when creating a
+/// <c><i>FfxFsr2Context</i></c>. See <c><i>FfxFsr2ContextDescription</i></c>.
+///
+/// @ingroup FSR2
+typedef enum FfxFsr2InitializationFlagBits {
+
+    FFX_FSR2_ENABLE_HIGH_DYNAMIC_RANGE                  = (1<<0),   ///< A bit indicating if the input color data provided is using a high-dynamic range.
+    FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS   = (1<<1),   ///< A bit indicating if the motion vectors are rendered at display resolution.
+    FFX_FSR2_ENABLE_MOTION_VECTORS_JITTER_CANCELLATION  = (1<<2),   ///< A bit indicating that the motion vectors have the jittering pattern applied to them.
+    FFX_FSR2_ENABLE_DEPTH_INVERTED                      = (1<<3),   ///< A bit indicating that the input depth buffer data provided is inverted [1..0].
+    FFX_FSR2_ENABLE_DEPTH_INFINITE                      = (1<<4),   ///< A bit indicating that the input depth buffer data provided is using an infinite far plane.
+    FFX_FSR2_ENABLE_AUTO_EXPOSURE                       = (1<<5),   ///< A bit indicating if automatic exposure should be applied to input color data.
+    FFX_FSR2_ENABLE_DYNAMIC_RESOLUTION                  = (1<<6),   ///< A bit indicating that the application uses dynamic resolution scaling.
+    FFX_FSR2_ENABLE_TEXTURE1D_USAGE                     = (1<<7),   ///< A bit indicating that the backend should use 1D textures.
+    FFX_FSR2_ENABLE_DEBUG_CHECKING                      = (1<<8),   ///< A bit indicating that the runtime should check some API values and report issues.
+} FfxFsr2InitializationFlagBits;
+
+/// A structure encapsulating the parameters required to initialize FidelityFX
+/// Super Resolution 2 upscaling.
+///
+/// @ingroup FSR2
+typedef struct FfxFsr2ContextDescription {
+
+    uint32_t                    flags;                              ///< A collection of <c><i>FfxFsr2InitializationFlagBits</i></c>.
+    FfxDimensions2D             maxRenderSize;                      ///< The maximum size that rendering will be performed at.
+    FfxDimensions2D             displaySize;                        ///< The size of the presentation resolution targeted by the upscaling process.
+    FfxFsr2Interface            callbacks;                          ///< A set of pointers to the backend implementation for FSR 2.0.
+    FfxDevice                   device;                             ///< The abstracted device which is passed to some callback functions.
+
+    FfxFsr2Message              fpMessage;                          ///< A pointer to a function that can recieve messages from the runtime.
+} FfxFsr2ContextDescription;
+
+/// A structure encapsulating the parameters for dispatching the various passes
+/// of FidelityFX Super Resolution 2.
+///
+/// @ingroup FSR2
+typedef struct FfxFsr2DispatchDescription {
+
+    FfxCommandList              commandList;                        ///< The <c><i>FfxCommandList</i></c> to record FSR2 rendering commands into.
+    FfxResource                 color;                              ///< A <c><i>FfxResource</i></c> containing the color buffer for the current frame (at render resolution).
+    FfxResource                 depth;                              ///< A <c><i>FfxResource</i></c> containing 32bit depth values for the current frame (at render resolution).
+    FfxResource                 motionVectors;                      ///< A <c><i>FfxResource</i></c> containing 2-dimensional motion vectors (at render resolution if <c><i>FFX_FSR2_ENABLE_DISPLAY_RESOLUTION_MOTION_VECTORS</i></c> is not set).
+    FfxResource                 exposure;                           ///< A optional <c><i>FfxResource</i></c> containing a 1x1 exposure value.
+    FfxResource                 reactive;                           ///< A optional <c><i>FfxResource</i></c> containing alpha value of reactive objects in the scene.
+    FfxResource                 transparencyAndComposition;         ///< A optional <c><i>FfxResource</i></c> containing alpha value of special objects in the scene.
+    FfxResource                 output;                             ///< A <c><i>FfxResource</i></c> containing the output color buffer for the current frame (at presentation resolution).
+    FfxFloatCoords2D            jitterOffset;                       ///< The subpixel jitter offset applied to the camera.
+    FfxFloatCoords2D            motionVectorScale;                  ///< The scale factor to apply to motion vectors.
+    FfxDimensions2D             renderSize;                         ///< The resolution that was used for rendering the input resources.
+    bool                        enableSharpening;                   ///< Enable an additional sharpening pass.
+    float                       sharpness;                          ///< The sharpness value between 0 and 1, where 0 is no additional sharpness and 1 is maximum additional sharpness.
+    float                       frameTimeDelta;                     ///< The time elapsed since the last frame (expressed in milliseconds).
+    float                       preExposure;                        ///< The pre exposure value (must be > 0.0f)
+    bool                        reset;                              ///< A boolean value which when set to true, indicates the camera has moved discontinuously.
+    float                       cameraNear;                         ///< The distance to the near plane of the camera.
+    float                       cameraFar;                          ///< The distance to the far plane of the camera.
+    float                       cameraFovAngleVertical;             ///< The camera angle field of view in the vertical direction (expressed in radians).
+    float                       viewSpaceToMetersFactor;            ///< The scale factor to convert view space units to meters
+
+    // EXPERIMENTAL reactive mask generation parameters
+    bool                        enableAutoReactive;                 ///< A boolean value to indicate internal reactive autogeneration should be used
+    FfxResource                 colorOpaqueOnly;                    ///< A <c><i>FfxResource</i></c> containing the opaque only color buffer for the current frame (at render resolution).
+    float                       autoTcThreshold;                    ///< Cutoff value for TC
+    float                       autoTcScale;                        ///< A value to scale the transparency and composition mask
+    float                       autoReactiveScale;                  ///< A value to scale the reactive mask
+    float                       autoReactiveMax;                    ///< A value to clamp the reactive mask
+
+    // -- GODOT start --
+    float                       reprojectionMatrix[16];             ///< The matrix used for reprojecting pixels with invalid motion vectors by using the depth.
+	// -- GODOT end --
+
+} FfxFsr2DispatchDescription;
+
+/// A structure encapsulating the parameters for automatic generation of a reactive mask
+///
+/// @ingroup FSR2
+typedef struct FfxFsr2GenerateReactiveDescription {
+
+    FfxCommandList              commandList;                        ///< The <c><i>FfxCommandList</i></c> to record FSR2 rendering commands into.
+    FfxResource                 colorOpaqueOnly;                    ///< A <c><i>FfxResource</i></c> containing the opaque only color buffer for the current frame (at render resolution).
+    FfxResource                 colorPreUpscale;                    ///< A <c><i>FfxResource</i></c> containing the opaque+translucent color buffer for the current frame (at render resolution).
+    FfxResource                 outReactive;                        ///< A <c><i>FfxResource</i></c> containing the surface to generate the reactive mask into.
+    FfxDimensions2D             renderSize;                         ///< The resolution that was used for rendering the input resources.
+    float                       scale;                              ///< A value to scale the output
+    float                       cutoffThreshold;                    ///< A threshold value to generate a binary reactive mask
+    float                       binaryValue;                        ///< A value to set for the binary reactive mask
+    uint32_t                    flags;                              ///< Flags to determine how to generate the reactive mask
+} FfxFsr2GenerateReactiveDescription;
+
+/// A structure encapsulating the FidelityFX Super Resolution 2 context.
+///
+/// This sets up an object which contains all persistent internal data and
+/// resources that are required by FSR2.
+///
+/// The <c><i>FfxFsr2Context</i></c> object should have a lifetime matching
+/// your use of FSR2. Before destroying the FSR2 context care should be taken
+/// to ensure the GPU is not accessing the resources created or used by FSR2.
+/// It is therefore recommended that the GPU is idle before destroying the
+/// FSR2 context.
+///
+/// @ingroup FSR2
+typedef struct FfxFsr2Context {
+
+    uint32_t                    data[FFX_FSR2_CONTEXT_SIZE];        ///< An opaque set of <c>uint32_t</c> which contain the data for the context.
+} FfxFsr2Context;
+
+/// Create a FidelityFX Super Resolution 2 context from the parameters
+/// programmed to the <c><i>FfxFsr2CreateParams</i></c> structure.
+///
+/// The context structure is the main object used to interact with the FSR2
+/// API, and is responsible for the management of the internal resources used
+/// by the FSR2 algorithm. When this API is called, multiple calls will be
+/// made via the pointers contained in the <c><i>callbacks</i></c> structure.
+/// These callbacks will attempt to retreive the device capabilities, and
+/// create the internal resources, and pipelines required by FSR2's
+/// frame-to-frame function. Depending on the precise configuration used when
+/// creating the <c><i>FfxFsr2Context</i></c> a different set of resources and
+/// pipelines might be requested via the callback functions.
+///
+/// The flags included in the <c><i>flags</i></c> field of
+/// <c><i>FfxFsr2Context</i></c> how match the configuration of your
+/// application as well as the intended use of FSR2. It is important that these
+/// flags are set correctly (as well as a correct programmed
+/// <c><i>FfxFsr2DispatchDescription</i></c>) to ensure correct operation. It is
+/// recommended to consult the overview documentation for further details on
+/// how FSR2 should be integerated into an application.
+///
+/// When the <c><i>FfxFsr2Context</i></c> is created, you should use the
+/// <c><i>ffxFsr2ContextDispatch</i></c> function each frame where FSR2
+/// upscaling should be applied. See the documentation of
+/// <c><i>ffxFsr2ContextDispatch</i></c> for more details.
+///
+/// The <c><i>FfxFsr2Context</i></c> should be destroyed when use of it is
+/// completed, typically when an application is unloaded or FSR2 upscaling is
+/// disabled by a user. To destroy the FSR2 context you should call
+/// <c><i>ffxFsr2ContextDestroy</i></c>.
+///
+/// @param [out] context                A pointer to a <c><i>FfxFsr2Context</i></c> structure to populate.
+/// @param [in]  contextDescription     A pointer to a <c><i>FfxFsr2ContextDescription</i></c> structure.
+///
+/// @retval
+/// FFX_OK                              The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER         The operation failed because either <c><i>context</i></c> or <c><i>contextDescription</i></c> was <c><i>NULL</i></c>.
+/// @retval
+/// FFX_ERROR_INCOMPLETE_INTERFACE      The operation failed because the <c><i>FfxFsr2ContextDescription.callbacks</i></c>  was not fully specified.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR         The operation failed because of an error returned from the backend.
+///
+/// @ingroup FSR2
+FFX_API FfxErrorCode ffxFsr2ContextCreate(FfxFsr2Context* context, const FfxFsr2ContextDescription* contextDescription);
+
+/// Dispatch the various passes that constitute FidelityFX Super Resolution 2.
+///
+/// FSR2 is a composite effect, meaning that it is compromised of multiple
+/// constituent passes (implemented as one or more clears, copies and compute
+/// dispatches). The <c><i>ffxFsr2ContextDispatch</i></c> function is the
+/// function which (via the use of the functions contained in the
+/// <c><i>callbacks</i></c> field of the <c><i>FfxFsr2Context</i></c>
+/// structure) utlimately generates the sequence of graphics API calls required
+/// each frame.
+///
+/// As with the creation of the <c><i>FfxFsr2Context</i></c> correctly
+/// programming the <c><i>FfxFsr2DispatchDescription</i></c> is key to ensuring
+/// the correct operation of FSR2. It is particularly important to ensure that
+/// camera jitter is correctly applied to your application's projection matrix
+/// (or camera origin for raytraced applications). FSR2 provides the
+/// <c><i>ffxFsr2GetJitterPhaseCount</i></c> and
+/// <c><i>ffxFsr2GetJitterOffset</i></c> entry points to help applications
+/// correctly compute the camera jitter. Whatever jitter pattern is used by the
+/// application it should be correctly programmed to the
+/// <c><i>jitterOffset</i></c> field of the <c><i>dispatchDescription</i></c>
+/// structure. For more guidance on camera jitter please consult the
+/// documentation for <c><i>ffxFsr2GetJitterOffset</i></c> as well as the
+/// accompanying overview documentation for FSR2.
+///
+/// @param [in] context                 A pointer to a <c><i>FfxFsr2Context</i></c> structure.
+/// @param [in] dispatchDescription     A pointer to a <c><i>FfxFsr2DispatchDescription</i></c> structure.
+///
+/// @retval
+/// FFX_OK                              The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER         The operation failed because either <c><i>context</i></c> or <c><i>dispatchDescription</i></c> was <c><i>NULL</i></c>.
+/// @retval
+/// FFX_ERROR_OUT_OF_RANGE              The operation failed because <c><i>dispatchDescription.renderSize</i></c> was larger than the maximum render resolution.
+/// @retval
+/// FFX_ERROR_NULL_DEVICE               The operation failed because the device inside the context was <c><i>NULL</i></c>.
+/// @retval
+/// FFX_ERROR_BACKEND_API_ERROR         The operation failed because of an error returned from the backend.
+///
+/// @ingroup FSR2
+FFX_API FfxErrorCode ffxFsr2ContextDispatch(FfxFsr2Context* context, const FfxFsr2DispatchDescription* dispatchDescription);
+
+/// A helper function generate a Reactive mask from an opaque only texure and one containing translucent objects.
+///
+/// @param [in] context                 A pointer to a <c><i>FfxFsr2Context</i></c> structure.
+/// @param [in] params                  A pointer to a <c><i>FfxFsr2GenerateReactiveDescription</i></c> structure
+///
+/// @retval
+/// FFX_OK                              The operation completed successfully.
+///
+/// @ingroup FSR2
+FFX_API FfxErrorCode ffxFsr2ContextGenerateReactiveMask(FfxFsr2Context* context, const FfxFsr2GenerateReactiveDescription* params);
+
+/// Destroy the FidelityFX Super Resolution context.
+///
+/// @param [out] context                A pointer to a <c><i>FfxFsr2Context</i></c> structure to destroy.
+///
+/// @retval
+/// FFX_OK                              The operation completed successfully.
+/// @retval
+/// FFX_ERROR_CODE_NULL_POINTER         The operation failed because either <c><i>context</i></c> was <c><i>NULL</i></c>.
+///
+/// @ingroup FSR2
+FFX_API FfxErrorCode ffxFsr2ContextDestroy(FfxFsr2Context* context);
+
+/// Get the upscale ratio from the quality mode.
+///
+/// The following table enumerates the mapping of the quality modes to
+/// per-dimension scaling ratios.
+///
+/// Quality preset                                        | Scale factor
+/// ----------------------------------------------------- | -------------
+/// <c><i>FFX_FSR2_QUALITY_MODE_QUALITY</i></c>           | 1.5x
+/// <c><i>FFX_FSR2_QUALITY_MODE_BALANCED</i></c>          | 1.7x
+/// <c><i>FFX_FSR2_QUALITY_MODE_PERFORMANCE</i></c>       | 2.0x
+/// <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> | 3.0x
+///
+/// Passing an invalid <c><i>qualityMode</i></c> will return 0.0f.
+///
+/// @param [in] qualityMode             The quality mode preset.
+///
+/// @returns
+/// The upscaling the per-dimension upscaling ratio for
+/// <c><i>qualityMode</i></c> according to the table above.
+///
+/// @ingroup FSR2
+FFX_API float ffxFsr2GetUpscaleRatioFromQualityMode(FfxFsr2QualityMode qualityMode);
+
+/// A helper function to calculate the rendering resolution from a target
+/// resolution and desired quality level.
+///
+/// This function applies the scaling factor returned by
+/// <c><i>ffxFsr2GetUpscaleRatioFromQualityMode</i></c> to each dimension.
+///
+/// @param [out] renderWidth            A pointer to a <c>uint32_t</c> which will hold the calculated render resolution width.
+/// @param [out] renderHeight           A pointer to a <c>uint32_t</c> which will hold the calculated render resolution height.
+/// @param [in] displayWidth            The target display resolution width.
+/// @param [in] displayHeight           The target display resolution height.
+/// @param [in] qualityMode             The desired quality mode for FSR 2 upscaling.
+///
+/// @retval
+/// FFX_OK                              The operation completed successfully.
+/// @retval
+/// FFX_ERROR_INVALID_POINTER           Either <c><i>renderWidth</i></c> or <c><i>renderHeight</i></c> was <c>NULL</c>.
+/// @retval
+/// FFX_ERROR_INVALID_ENUM              An invalid quality mode was specified.
+///
+/// @ingroup FSR2
+FFX_API FfxErrorCode ffxFsr2GetRenderResolutionFromQualityMode(
+    uint32_t* renderWidth,
+    uint32_t* renderHeight,
+    uint32_t displayWidth,
+    uint32_t displayHeight,
+    FfxFsr2QualityMode qualityMode);
+
+/// A helper function to calculate the jitter phase count from display
+/// resolution.
+///
+/// For more detailed information about the application of camera jitter to
+/// your application's rendering please refer to the
+/// <c><i>ffxFsr2GetJitterOffset</i></c> function.
+/// 
+/// The table below shows the jitter phase count which this function
+/// would return for each of the quality presets.
+///
+/// Quality preset                                        | Scale factor  | Phase count
+/// ----------------------------------------------------- | ------------- | ---------------
+/// <c><i>FFX_FSR2_QUALITY_MODE_QUALITY</i></c>           | 1.5x          | 18
+/// <c><i>FFX_FSR2_QUALITY_MODE_BALANCED</i></c>          | 1.7x          | 23
+/// <c><i>FFX_FSR2_QUALITY_MODE_PERFORMANCE</i></c>       | 2.0x          | 32
+/// <c><i>FFX_FSR2_QUALITY_MODE_ULTRA_PERFORMANCE</i></c> | 3.0x          | 72
+/// Custom                                                | [1..n]x       | ceil(8*n^2)
+///
+/// @param [in] renderWidth             The render resolution width.
+/// @param [in] displayWidth            The display resolution width.
+///
+/// @returns
+/// The jitter phase count for the scaling factor between <c><i>renderWidth</i></c> and <c><i>displayWidth</i></c>.
+///
+/// @ingroup FSR2
+FFX_API int32_t ffxFsr2GetJitterPhaseCount(int32_t renderWidth, int32_t displayWidth);
+
+/// A helper function to calculate the subpixel jitter offset.
+///
+/// FSR2 relies on the application to apply sub-pixel jittering while rendering.
+/// This is typically included in the projection matrix of the camera. To make
+/// the application of camera jitter simple, the FSR2 API provides a small set
+/// of utility function which computes the sub-pixel jitter offset for a
+/// particular frame within a sequence of separate jitter offsets. To begin, the
+/// index within the jitter phase must be computed. To calculate the
+/// sequence's length, you can call the <c><i>ffxFsr2GetJitterPhaseCount</i></c>
+/// function. The index should be a value which is incremented each frame modulo
+/// the length of the sequence computed by <c><i>ffxFsr2GetJitterPhaseCount</i></c>.
+/// The index within the jitter phase  is passed to
+/// <c><i>ffxFsr2GetJitterOffset</i></c> via the <c><i>index</i></c> parameter.
+///
+/// This function uses a Halton(2,3) sequence to compute the jitter offset.
+/// The ultimate index used for the sequence is <c><i>index</i></c> %
+/// <c><i>phaseCount</i></c>.
+///
+/// It is important to understand that the values returned from the
+/// <c><i>ffxFsr2GetJitterOffset</i></c> function are in unit pixel space, and
+/// in order to composite this correctly into a projection matrix we must
+/// convert them into projection offsets. This is done as per the pseudo code
+/// listing which is shown below.
+///
+///     const int32_t jitterPhaseCount = ffxFsr2GetJitterPhaseCount(renderWidth, displayWidth);
+///
+///     float jitterX = 0;
+///     float jitterY = 0;
+///     ffxFsr2GetJitterOffset(&jitterX, &jitterY, index, jitterPhaseCount);
+/// 
+///     const float jitterX = 2.0f * jitterX / (float)renderWidth;
+///     const float jitterY = -2.0f * jitterY / (float)renderHeight;
+///     const Matrix4 jitterTranslationMatrix = translateMatrix(Matrix3::identity, Vector3(jitterX, jitterY, 0));
+///     const Matrix4 jitteredProjectionMatrix = jitterTranslationMatrix * projectionMatrix;
+/// 
+/// Jitter should be applied to all rendering. This includes opaque, alpha
+/// transparent, and raytraced objects. For rasterized objects, the sub-pixel
+/// jittering values calculated by the <c><i>iffxFsr2GetJitterOffset</i></c>
+/// function can be applied to the camera projection matrix which is ultimately
+/// used to perform transformations during vertex shading. For raytraced
+/// rendering, the sub-pixel jitter should be applied to the ray's origin,
+/// often the camera's position.
+/// 
+/// Whether you elect to use the <c><i>ffxFsr2GetJitterOffset</i></c> function
+/// or your own sequence generator, you must program the
+/// <c><i>jitterOffset</i></c> field of the
+/// <c><i>FfxFsr2DispatchParameters</i></c> structure in order to inform FSR2
+/// of the jitter offset that has been applied in order to render each frame.
+/// 
+/// If not using the recommended <c><i>ffxFsr2GetJitterOffset</i></c> function,
+/// care should be taken that your jitter sequence never generates a null vector;
+/// that is value of 0 in both the X and Y dimensions.
+///
+/// @param [out] outX                   A pointer to a <c>float</c> which will contain the subpixel jitter offset for the x dimension.
+/// @param [out] outY                   A pointer to a <c>float</c> which will contain the subpixel jitter offset for the y dimension.
+/// @param [in] index                   The index within the jitter sequence.
+/// @param [in] phaseCount              The length of jitter phase. See <c><i>ffxFsr2GetJitterPhaseCount</i></c>.
+/// 
+/// @retval
+/// FFX_OK                              The operation completed successfully.
+/// @retval
+/// FFX_ERROR_INVALID_POINTER           Either <c><i>outX</i></c> or <c><i>outY</i></c> was <c>NULL</c>.
+/// @retval
+/// FFX_ERROR_INVALID_ARGUMENT          Argument <c><i>phaseCount</i></c> must be greater than 0.
+/// 
+/// @ingroup FSR2
+FFX_API FfxErrorCode ffxFsr2GetJitterOffset(float* outX, float* outY, int32_t index, int32_t phaseCount);
+
+/// A helper function to check if a resource is
+/// <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
+///
+/// @param [in] resource                A <c><i>FfxResource</i></c>.
+///
+/// @returns
+/// true                                The <c><i>resource</i></c> was not <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
+/// @returns
+/// false                               The <c><i>resource</i></c> was <c><i>FFX_FSR2_RESOURCE_IDENTIFIER_NULL</i></c>.
+///
+/// @ingroup FSR2
+FFX_API bool ffxFsr2ResourceIsNull(FfxResource resource);
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-fsr2/ffx_fsr2_interface.h b/thirdparty/amd-fsr2/ffx_fsr2_interface.h
new file mode 100644
index 0000000000..b6be9760a7
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_fsr2_interface.h
@@ -0,0 +1,395 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_assert.h"
+#include "ffx_types.h"
+#include "ffx_error.h"
+
+// Include the FSR2 resources defined in the HLSL code. This shared here to avoid getting out of sync.
+#define FFX_CPU
+#include "shaders/ffx_fsr2_resources.h"
+#include "shaders/ffx_fsr2_common.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif // #if defined(__cplusplus)
+
+FFX_FORWARD_DECLARE(FfxFsr2Interface);
+
+/// An enumeration of all the passes which constitute the FSR2 algorithm.
+///
+/// FSR2 is implemented as a composite of several compute passes each
+/// computing a key part of the final result. Each call to the 
+/// <c><i>FfxFsr2ScheduleGpuJobFunc</i></c> callback function will
+/// correspond to a single pass included in <c><i>FfxFsr2Pass</i></c>. For a
+/// more comprehensive description of each pass, please refer to the FSR2
+/// reference documentation.
+///
+/// Please note in some cases e.g.: <c><i>FFX_FSR2_PASS_ACCUMULATE</i></c>
+/// and <c><i>FFX_FSR2_PASS_ACCUMULATE_SHARPEN</i></c> either one pass or the
+/// other will be used (they are mutually exclusive). The choice of which will
+/// depend on the way the <c><i>FfxFsr2Context</i></c> is created and the
+/// precise contents of <c><i>FfxFsr2DispatchParamters</i></c> each time a call
+/// is made to <c><i>ffxFsr2ContextDispatch</i></c>.
+/// 
+/// @ingroup FSR2
+typedef enum FfxFsr2Pass {
+
+    FFX_FSR2_PASS_DEPTH_CLIP = 0,                                       ///< A pass which performs depth clipping.
+    FFX_FSR2_PASS_RECONSTRUCT_PREVIOUS_DEPTH = 1,                       ///< A pass which performs reconstruction of previous frame's depth.
+    FFX_FSR2_PASS_LOCK = 2,                                             ///< A pass which calculates pixel locks.
+    FFX_FSR2_PASS_ACCUMULATE = 3,                                       ///< A pass which performs upscaling.
+    FFX_FSR2_PASS_ACCUMULATE_SHARPEN = 4,                               ///< A pass which performs upscaling when sharpening is used.
+    FFX_FSR2_PASS_RCAS = 5,                                             ///< A pass which performs sharpening.
+    FFX_FSR2_PASS_COMPUTE_LUMINANCE_PYRAMID = 6,                        ///< A pass which generates the luminance mipmap chain for the current frame.
+    FFX_FSR2_PASS_GENERATE_REACTIVE = 7,                                ///< An optional pass to generate a reactive mask
+    FFX_FSR2_PASS_TCR_AUTOGENERATE = 8,                                 ///< An optional pass to generate a texture-and-composition and reactive masks
+
+    FFX_FSR2_PASS_COUNT                                                 ///< The number of passes performed by FSR2.
+} FfxFsr2Pass;
+
+typedef enum FfxFsr2MsgType {
+    FFX_FSR2_MESSAGE_TYPE_ERROR = 0,
+    FFX_FSR2_MESSAGE_TYPE_WARNING = 1,
+    FFX_FSR2_MESSAGE_TYPE_COUNT
+} FfxFsr2MsgType;
+
+/// Create and initialize the backend context.
+///
+/// The callback function sets up the backend context for rendering.
+/// It will create or reference the device and create required internal data structures.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [in] device                              The FfxDevice obtained by ffxGetDevice(DX12/VK/...).
+///
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+///
+/// @ingroup FSR2
+typedef FfxErrorCode (*FfxFsr2CreateBackendContextFunc)(
+    FfxFsr2Interface* backendInterface,
+    FfxDevice device);
+
+/// Get a list of capabilities of the device.
+///
+/// When creating an <c><i>FfxFsr2Context</i></c> it is desirable for the FSR2
+/// core implementation to be aware of certain characteristics of the platform
+/// that is being targetted. This is because some optimizations which FSR2
+/// attempts to perform are more effective on certain classes of hardware than
+/// others, or are not supported by older hardware. In order to avoid cases
+/// where optimizations actually have the effect of decreasing performance, or
+/// reduce the breadth of support provided by FSR2, FSR2 queries the
+/// capabilities of the device to make such decisions.
+///
+/// For target platforms with fixed hardware support you need not implement
+/// this callback function by querying the device, but instead may hardcore
+/// what features are available on the platform.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [out] outDeviceCapabilities              The device capabilities structure to fill out.
+/// @param [in] device                              The device to query for capabilities.
+///
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode(*FfxFsr2GetDeviceCapabilitiesFunc)(
+    FfxFsr2Interface* backendInterface,
+    FfxDeviceCapabilities* outDeviceCapabilities,
+    FfxDevice device);
+
+/// Destroy the backend context and dereference the device.
+///
+/// This function is called when the <c><i>FfxFsr2Context</i></c> is destroyed.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+///
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+///
+/// @ingroup FSR2
+typedef FfxErrorCode(*FfxFsr2DestroyBackendContextFunc)(
+    FfxFsr2Interface* backendInterface);
+
+/// Create a resource.
+///
+/// This callback is intended for the backend to create internal resources.
+///
+/// Please note: It is also possible that the creation of resources might
+/// itself cause additional resources to be created by simply calling the
+/// <c><i>FfxFsr2CreateResourceFunc</i></c> function pointer again. This is
+/// useful when handling the initial creation of resources which must be
+/// initialized. The flow in such a case would be an initial call to create the
+/// CPU-side resource, another to create the GPU-side resource, and then a call
+/// to schedule a copy render job to move the data between the two. Typically
+/// this type of function call flow is only seen during the creation of an
+/// <c><i>FfxFsr2Context</i></c>.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [in] createResourceDescription           A pointer to a <c><i>FfxCreateResourceDescription</i></c>.
+/// @param [out] outResource                        A pointer to a <c><i>FfxResource</i></c> object.
+///
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode (*FfxFsr2CreateResourceFunc)(
+    FfxFsr2Interface* backendInterface,
+    const FfxCreateResourceDescription* createResourceDescription,
+    FfxResourceInternal* outResource);
+
+/// Register a resource in the backend for the current frame.
+///
+/// Since FSR2 and the backend are not aware how many different
+/// resources will get passed to FSR2 over time, it's not safe 
+/// to register all resources simultaneously in the backend.
+/// Also passed resources may not be valid after the dispatch call.
+/// As a result it's safest to register them as FfxResourceInternal 
+/// and clear them at the end of the dispatch call.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [in] inResource                          A pointer to a <c><i>FfxResource</i></c>.
+/// @param [out] outResource                        A pointer to a <c><i>FfxResourceInternal</i></c> object.
+///
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode(*FfxFsr2RegisterResourceFunc)(
+    FfxFsr2Interface* backendInterface,
+    const FfxResource* inResource,
+    FfxResourceInternal* outResource);
+
+/// Unregister all temporary FfxResourceInternal from the backend.
+///
+/// Unregister FfxResourceInternal referencing resources passed to 
+/// a function as a parameter.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+///
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode(*FfxFsr2UnregisterResourcesFunc)(
+    FfxFsr2Interface* backendInterface);
+
+/// Retrieve a <c><i>FfxResourceDescription</i></c> matching a
+/// <c><i>FfxResource</i></c> structure. 
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [in] resource                            A pointer to a <c><i>FfxResource</i></c> object.
+///
+/// @returns
+/// A description of the resource.
+///
+/// @ingroup FSR2
+typedef FfxResourceDescription (*FfxFsr2GetResourceDescriptionFunc)(
+    FfxFsr2Interface* backendInterface,
+    FfxResourceInternal resource);
+
+/// Destroy a resource
+///
+/// This callback is intended for the backend to release an internal resource.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [in] resource                            A pointer to a <c><i>FfxResource</i></c> object.
+/// 
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode (*FfxFsr2DestroyResourceFunc)(
+    FfxFsr2Interface* backendInterface,
+    FfxResourceInternal resource);
+
+/// Create a render pipeline.
+///
+/// A rendering pipeline contains the shader as well as resource bindpoints
+/// and samplers.
+/// 
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [in] pass                                The identifier for the pass.
+/// @param [in] pipelineDescription                 A pointer to a <c><i>FfxPipelineDescription</i></c> describing the pipeline to be created.
+/// @param [out] outPipeline                        A pointer to a <c><i>FfxPipelineState</i></c> structure which should be populated.
+/// 
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode (*FfxFsr2CreatePipelineFunc)(
+    FfxFsr2Interface* backendInterface,
+    FfxFsr2Pass pass,
+    const FfxPipelineDescription* pipelineDescription,
+    FfxPipelineState* outPipeline);
+
+/// Destroy a render pipeline.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [out] pipeline                           A pointer to a <c><i>FfxPipelineState</i></c> structure which should be released.
+/// 
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode (*FfxFsr2DestroyPipelineFunc)(
+    FfxFsr2Interface* backendInterface,
+    FfxPipelineState* pipeline);
+
+/// Schedule a render job to be executed on the next call of
+/// <c><i>FfxFsr2ExecuteGpuJobsFunc</i></c>.
+///
+/// Render jobs can perform one of three different tasks: clear, copy or
+/// compute dispatches.
+///
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [in] job                                 A pointer to a <c><i>FfxGpuJobDescription</i></c> structure.
+/// 
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode (*FfxFsr2ScheduleGpuJobFunc)(
+    FfxFsr2Interface* backendInterface,
+    const FfxGpuJobDescription* job);
+
+/// Execute scheduled render jobs on the <c><i>comandList</i></c> provided.
+/// 
+/// The recording of the graphics API commands should take place in this
+/// callback function, the render jobs which were previously enqueued (via
+/// callbacks made to <c><i>FfxFsr2ScheduleGpuJobFunc</i></c>) should be
+/// processed in the order they were received. Advanced users might choose to
+/// reorder the rendering jobs, but should do so with care to respect the
+/// resource dependencies.
+/// 
+/// Depending on the precise contents of <c><i>FfxFsr2DispatchDescription</i></c> a
+/// different number of render jobs might have previously been enqueued (for
+/// example if sharpening is toggled on and off).
+/// 
+/// @param [in] backendInterface                    A pointer to the backend interface.
+/// @param [in] commandList                         A pointer to a <c><i>FfxCommandList</i></c> structure.
+/// 
+/// @retval
+/// FFX_OK                                          The operation completed successfully.
+/// @retval
+/// Anything else                                   The operation failed.
+/// 
+/// @ingroup FSR2
+typedef FfxErrorCode (*FfxFsr2ExecuteGpuJobsFunc)(
+    FfxFsr2Interface* backendInterface,
+    FfxCommandList commandList);
+
+/// Pass a string message
+///
+/// Used for debug messages.
+///
+/// @param [in] type                       The type of message.
+/// @param [in] message                    A string message to pass.
+///
+///
+/// @ingroup FSR2
+typedef void(*FfxFsr2Message)(
+    FfxFsr2MsgType type,
+    const wchar_t* message);
+
+/// A structure encapsulating the interface between the core implentation of
+/// the FSR2 algorithm and any graphics API that it should ultimately call.
+/// 
+/// This set of functions serves as an abstraction layer between FSR2 and the
+/// API used to implement it. While FSR2 ships with backends for DirectX12 and
+/// Vulkan, it is possible to implement your own backend for other platforms or
+/// which sits ontop of your engine's own abstraction layer. For details on the
+/// expectations of what each function should do you should refer the
+/// description of the following function pointer types:
+/// 
+///     <c><i>FfxFsr2CreateDeviceFunc</i></c>
+///     <c><i>FfxFsr2GetDeviceCapabilitiesFunc</i></c>
+///     <c><i>FfxFsr2DestroyDeviceFunc</i></c>
+///     <c><i>FfxFsr2CreateResourceFunc</i></c>
+///     <c><i>FfxFsr2GetResourceDescriptionFunc</i></c>
+///     <c><i>FfxFsr2DestroyResourceFunc</i></c>
+///     <c><i>FfxFsr2CreatePipelineFunc</i></c>
+///     <c><i>FfxFsr2DestroyPipelineFunc</i></c>
+///     <c><i>FfxFsr2ScheduleGpuJobFunc</i></c>
+///     <c><i>FfxFsr2ExecuteGpuJobsFunc</i></c>
+///
+/// Depending on the graphics API that is abstracted by the backend, it may be
+/// required that the backend is to some extent stateful. To ensure that
+/// applications retain full control to manage the memory used by FSR2, the
+/// <c><i>scratchBuffer</i></c> and <c><i>scratchBufferSize</i></c> fields are
+/// provided. A backend should provide a means of specifying how much scratch
+/// memory is required for its internal implementation (e.g: via a function
+/// or constant value). The application is that responsible for allocating that
+/// memory and providing it when setting up the FSR2 backend. Backends provided
+/// with FSR2 do not perform dynamic memory allocations, and instead
+/// suballocate all memory from the scratch buffers provided.
+///
+/// The <c><i>scratchBuffer</i></c> and <c><i>scratchBufferSize</i></c> fields
+/// should be populated according to the requirements of each backend. For
+/// example, if using the DirectX 12 backend you should call the 
+/// <c><i>ffxFsr2GetScratchMemorySizeDX12</i></c> function. It is not required
+/// that custom backend implementations use a scratch buffer.
+///
+/// @ingroup FSR2
+typedef struct FfxFsr2Interface {
+
+    FfxFsr2CreateBackendContextFunc         fpCreateBackendContext;         ///< A callback function to create and initialize the backend context.
+    FfxFsr2GetDeviceCapabilitiesFunc        fpGetDeviceCapabilities;        ///< A callback function to query device capabilites.
+    FfxFsr2DestroyBackendContextFunc        fpDestroyBackendContext;        ///< A callback function to destroy the backendcontext. This also dereferences the device.
+    FfxFsr2CreateResourceFunc               fpCreateResource;               ///< A callback function to create a resource.
+    FfxFsr2RegisterResourceFunc             fpRegisterResource;             ///< A callback function to register an external resource.
+    FfxFsr2UnregisterResourcesFunc          fpUnregisterResources;          ///< A callback function to unregister external resource.
+    FfxFsr2GetResourceDescriptionFunc       fpGetResourceDescription;       ///< A callback function to retrieve a resource description.
+    FfxFsr2DestroyResourceFunc              fpDestroyResource;              ///< A callback function to destroy a resource.
+    FfxFsr2CreatePipelineFunc               fpCreatePipeline;               ///< A callback function to create a render or compute pipeline.
+    FfxFsr2DestroyPipelineFunc              fpDestroyPipeline;              ///< A callback function to destroy a render or compute pipeline.
+    FfxFsr2ScheduleGpuJobFunc               fpScheduleGpuJob;               ///< A callback function to schedule a render job.
+    FfxFsr2ExecuteGpuJobsFunc               fpExecuteGpuJobs;               ///< A callback function to execute all queued render jobs.
+
+    void*                                   scratchBuffer;                  ///< A preallocated buffer for memory utilized internally by the backend.
+    size_t                                  scratchBufferSize;              ///< Size of the buffer pointed to by <c><i>scratchBuffer</i></c>.
+} FfxFsr2Interface;
+
+#if defined(__cplusplus)
+}
+#endif // #if defined(__cplusplus)
diff --git a/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h b/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h
new file mode 100644
index 0000000000..5fdbd0cdcd
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_fsr2_maximum_bias.h
@@ -0,0 +1,46 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+// @internal
+
+#pragma once
+
+static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_WIDTH = 16;
+static const int FFX_FSR2_MAXIMUM_BIAS_TEXTURE_HEIGHT = 16;
+static const float ffxFsr2MaximumBias[] = {
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.876f,	1.809f,	1.772f,	1.753f,	1.748f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.869f,	1.801f,	1.764f,	1.745f,	1.739f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.976f,	1.841f,	1.774f,	1.737f,	1.716f,	1.71f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.914f,	1.784f,	1.716f,	1.673f,	1.649f,	1.641f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.793f,	1.676f,	1.604f,	1.562f,	1.54f,	1.533f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.802f,	1.619f,	1.536f,	1.492f,	1.467f,	1.454f,	1.449f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.812f,	1.575f,	1.496f,	1.456f,	1.432f,	1.416f,	1.408f,	1.405f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.555f,	1.479f,	1.438f,	1.413f,	1.398f,	1.387f,	1.381f,	1.379f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.812f,	1.555f,	1.474f,	1.43f,	1.404f,	1.387f,	1.376f,	1.368f,	1.363f,	1.362f,
+	2.0f,	2.0f,	2.0f,	2.0f,	2.0f,	1.802f,	1.575f,	1.479f,	1.43f,	1.401f,	1.382f,	1.369f,	1.36f,	1.354f,	1.351f,	1.35f,
+	2.0f,	2.0f,	1.976f,	1.914f,	1.793f,	1.619f,	1.496f,	1.438f,	1.404f,	1.382f,	1.367f,	1.357f,	1.349f,	1.344f,	1.341f,	1.34f,
+	1.876f,	1.869f,	1.841f,	1.784f,	1.676f,	1.536f,	1.456f,	1.413f,	1.387f,	1.369f,	1.357f,	1.347f,	1.341f,	1.336f,	1.333f,	1.332f,
+	1.809f,	1.801f,	1.774f,	1.716f,	1.604f,	1.492f,	1.432f,	1.398f,	1.376f,	1.36f,	1.349f,	1.341f,	1.335f,	1.33f,	1.328f,	1.327f,
+	1.772f,	1.764f,	1.737f,	1.673f,	1.562f,	1.467f,	1.416f,	1.387f,	1.368f,	1.354f,	1.344f,	1.336f,	1.33f,	1.326f,	1.323f,	1.323f,
+	1.753f,	1.745f,	1.716f,	1.649f,	1.54f,	1.454f,	1.408f,	1.381f,	1.363f,	1.351f,	1.341f,	1.333f,	1.328f,	1.323f,	1.321f,	1.32f,
+	1.748f,	1.739f,	1.71f,	1.641f,	1.533f,	1.449f,	1.405f,	1.379f,	1.362f,	1.35f,	1.34f,	1.332f,	1.327f,	1.323f,	1.32f,	1.319f,
+
+};
diff --git a/thirdparty/amd-fsr2/ffx_fsr2_private.h b/thirdparty/amd-fsr2/ffx_fsr2_private.h
new file mode 100644
index 0000000000..0face069b6
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_fsr2_private.h
@@ -0,0 +1,86 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+// Constants for FSR2 DX12 dispatches. Must be kept in sync with cbFSR2 in ffx_fsr2_callbacks_hlsl.h
+typedef struct Fsr2Constants {
+
+    int32_t                     renderSize[2];
+    int32_t                     maxRenderSize[2];
+    int32_t                     displaySize[2];
+    int32_t                     inputColorResourceDimensions[2];
+    int32_t                     lumaMipDimensions[2];
+    int32_t                     lumaMipLevelToUse;
+    int32_t                     frameIndex;
+    
+    float                       deviceToViewDepth[4];
+    float                       jitterOffset[2];
+    float                       motionVectorScale[2];
+    float                       downscaleFactor[2];
+    float                       motionVectorJitterCancellation[2];
+    float                       preExposure;
+    float                       previousFramePreExposure;
+    float                       tanHalfFOV;
+    float                       jitterPhaseCount;
+    float                       deltaTime;
+    float                       dynamicResChangeFactor;
+    float                       viewSpaceToMetersFactor;
+
+	// -- GODOT start --
+    float                       pad;
+    float                       reprojectionMatrix[16];
+	// -- GODOT end --
+} Fsr2Constants;
+
+struct FfxFsr2ContextDescription;
+struct FfxDeviceCapabilities;
+struct FfxPipelineState;
+struct FfxResource;
+
+// FfxFsr2Context_Private
+// The private implementation of the FSR2 context.
+typedef struct FfxFsr2Context_Private {
+
+    FfxFsr2ContextDescription   contextDescription;
+    Fsr2Constants               constants;
+    FfxDevice                   device;
+    FfxDeviceCapabilities       deviceCapabilities;
+    FfxPipelineState            pipelineDepthClip;
+    FfxPipelineState            pipelineReconstructPreviousDepth;
+    FfxPipelineState            pipelineLock;
+    FfxPipelineState            pipelineAccumulate;
+    FfxPipelineState            pipelineAccumulateSharpen;
+    FfxPipelineState            pipelineRCAS;
+    FfxPipelineState            pipelineComputeLuminancePyramid;
+    FfxPipelineState            pipelineGenerateReactive;
+    FfxPipelineState            pipelineTcrAutogenerate;
+
+    // 2 arrays of resources, as e.g. FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS will use different resources when bound as SRV vs when bound as UAV
+    FfxResourceInternal         srvResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT];
+    FfxResourceInternal         uavResources[FFX_FSR2_RESOURCE_IDENTIFIER_COUNT];
+
+    bool                        firstExecution;
+    bool                        refreshPipelineStates;
+    uint32_t                    resourceFrameIndex;
+    float                       previousJitterOffset[2];
+    int32_t                     jitterPhaseCountRemaining;
+} FfxFsr2Context_Private;
diff --git a/thirdparty/amd-fsr2/ffx_types.h b/thirdparty/amd-fsr2/ffx_types.h
new file mode 100644
index 0000000000..8b65219b50
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_types.h
@@ -0,0 +1,367 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include <stdint.h>
+// -- GODOT start --
+#include <stdlib.h>
+// -- GODOT end --
+
+#if defined (FFX_GCC)
+/// FidelityFX exported functions
+#define FFX_API
+#else
+/// FidelityFX exported functions
+#define FFX_API __declspec(dllexport)
+#endif // #if defined (FFX_GCC)
+
+/// Maximum supported number of simultaneously bound SRVs.
+#define FFX_MAX_NUM_SRVS            16
+
+/// Maximum supported number of simultaneously bound UAVs.
+#define FFX_MAX_NUM_UAVS            8
+
+/// Maximum number of constant buffers bound.
+#define FFX_MAX_NUM_CONST_BUFFERS   2
+
+/// Maximum size of bound constant buffers.
+#define FFX_MAX_CONST_SIZE          64
+
+/// Off by default warnings
+#if defined(_MSC_VER)
+#pragma warning(disable : 4365 4710 4820 5039)
+#elif defined(__clang__)
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#pragma clang diagnostic ignored "-Wmissing-field-initializers"
+#pragma clang diagnostic ignored "-Wsign-compare"
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wignored-qualifiers"
+#elif defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wunused-function"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif  // #ifdef __cplusplus
+
+/// An enumeration of surface formats.
+typedef enum FfxSurfaceFormat {
+
+    FFX_SURFACE_FORMAT_UNKNOWN,                     ///< Unknown format
+    FFX_SURFACE_FORMAT_R32G32B32A32_TYPELESS,       ///< 32 bit per channel, 4 channel typeless format
+    FFX_SURFACE_FORMAT_R32G32B32A32_FLOAT,          ///< 32 bit per channel, 4 channel float format
+    FFX_SURFACE_FORMAT_R16G16B16A16_FLOAT,          ///< 16 bit per channel, 4 channel float format
+    FFX_SURFACE_FORMAT_R16G16B16A16_UNORM,          ///< 16 bit per channel, 4 channel unsigned normalized format
+    FFX_SURFACE_FORMAT_R32G32_FLOAT,                ///< 32 bit per channel, 2 channel float format
+    FFX_SURFACE_FORMAT_R32_UINT,                    ///< 32 bit per channel, 1 channel float format
+    FFX_SURFACE_FORMAT_R8G8B8A8_TYPELESS,           ///<  8 bit per channel, 4 channel float format
+    FFX_SURFACE_FORMAT_R8G8B8A8_UNORM,              ///<  8 bit per channel, 4 channel unsigned normalized format
+    FFX_SURFACE_FORMAT_R11G11B10_FLOAT,             ///< 32 bit 3 channel float format
+    FFX_SURFACE_FORMAT_R16G16_FLOAT,                ///< 16 bit per channel, 2 channel float format
+    FFX_SURFACE_FORMAT_R16G16_UINT,                 ///< 16 bit per channel, 2 channel unsigned int format
+    FFX_SURFACE_FORMAT_R16_FLOAT,                   ///< 16 bit per channel, 1 channel float format
+    FFX_SURFACE_FORMAT_R16_UINT,                    ///< 16 bit per channel, 1 channel unsigned int format
+    FFX_SURFACE_FORMAT_R16_UNORM,                   ///< 16 bit per channel, 1 channel unsigned normalized format
+    FFX_SURFACE_FORMAT_R16_SNORM,                   ///< 16 bit per channel, 1 channel signed normalized format
+    FFX_SURFACE_FORMAT_R8_UNORM,                    ///<  8 bit per channel, 1 channel unsigned normalized format
+    FFX_SURFACE_FORMAT_R8_UINT,                     ///<  8 bit per channel, 1 channel unsigned int format
+    FFX_SURFACE_FORMAT_R8G8_UNORM,                  ///<  8 bit per channel, 2 channel unsigned normalized format
+    FFX_SURFACE_FORMAT_R32_FLOAT                    ///< 32 bit per channel, 1 channel float format
+} FfxSurfaceFormat;
+
+/// An enumeration of resource usage.
+typedef enum FfxResourceUsage {
+
+    FFX_RESOURCE_USAGE_READ_ONLY = 0,               ///< No usage flags indicate a resource is read only.
+    FFX_RESOURCE_USAGE_RENDERTARGET = (1<<0),       ///< Indicates a resource will be used as render target.
+    FFX_RESOURCE_USAGE_UAV = (1<<1),                ///< Indicates a resource will be used as UAV.
+} FfxResourceUsage;
+
+/// An enumeration of resource states.
+typedef enum FfxResourceStates {
+
+    FFX_RESOURCE_STATE_UNORDERED_ACCESS = (1<<0),   ///< Indicates a resource is in the state to be used as UAV.
+    FFX_RESOURCE_STATE_COMPUTE_READ = (1 << 1),     ///< Indicates a resource is in the state to be read by compute shaders.
+    FFX_RESOURCE_STATE_COPY_SRC = (1 << 2),         ///< Indicates a resource is in the state to be used as source in a copy command.
+    FFX_RESOURCE_STATE_COPY_DEST = (1 << 3),        ///< Indicates a resource is in the state to be used as destination in a copy command.
+    FFX_RESOURCE_STATE_GENERIC_READ = (FFX_RESOURCE_STATE_COPY_SRC | FFX_RESOURCE_STATE_COMPUTE_READ),  ///< Indicates a resource is in generic (slow) read state.
+} FfxResourceStates;
+
+/// An enumeration of surface dimensions.
+typedef enum FfxResourceDimension {
+
+    FFX_RESOURCE_DIMENSION_TEXTURE_1D,              ///< A resource with a single dimension.
+    FFX_RESOURCE_DIMENSION_TEXTURE_2D,              ///< A resource with two dimensions.
+} FfxResourceDimension;
+
+/// An enumeration of surface dimensions.
+typedef enum FfxResourceFlags {
+
+    FFX_RESOURCE_FLAGS_NONE         = 0,            ///< No flags.
+    FFX_RESOURCE_FLAGS_ALIASABLE    = (1<<0),       ///< A bit indicating a resource does not need to persist across frames.
+} FfxResourceFlags;
+
+/// An enumeration of all resource view types.
+typedef enum FfxResourceViewType {
+
+    FFX_RESOURCE_VIEW_UNORDERED_ACCESS,             ///< The resource view is an unordered access view (UAV).
+    FFX_RESOURCE_VIEW_SHADER_READ,                  ///< The resource view is a shader resource view (SRV).
+} FfxResourceViewType;
+
+/// The type of filtering to perform when reading a texture.
+typedef enum FfxFilterType {
+
+    FFX_FILTER_TYPE_POINT,                          ///< Point sampling.
+    FFX_FILTER_TYPE_LINEAR                          ///< Sampling with interpolation.
+} FfxFilterType;
+
+/// An enumeration of all supported shader models.
+typedef enum FfxShaderModel {
+
+    FFX_SHADER_MODEL_5_1,                           ///< Shader model 5.1.
+    FFX_SHADER_MODEL_6_0,                           ///< Shader model 6.0.
+    FFX_SHADER_MODEL_6_1,                           ///< Shader model 6.1.
+    FFX_SHADER_MODEL_6_2,                           ///< Shader model 6.2.
+    FFX_SHADER_MODEL_6_3,                           ///< Shader model 6.3.
+    FFX_SHADER_MODEL_6_4,                           ///< Shader model 6.4.
+    FFX_SHADER_MODEL_6_5,                           ///< Shader model 6.5.
+    FFX_SHADER_MODEL_6_6,                           ///< Shader model 6.6.
+    FFX_SHADER_MODEL_6_7,                           ///< Shader model 6.7.
+} FfxShaderModel;
+
+// An enumeration for different resource types
+typedef enum FfxResourceType {
+
+    FFX_RESOURCE_TYPE_BUFFER,                       ///< The resource is a buffer.
+    FFX_RESOURCE_TYPE_TEXTURE1D,                    ///< The resource is a 1-dimensional texture.
+    FFX_RESOURCE_TYPE_TEXTURE2D,                    ///< The resource is a 2-dimensional texture.
+    FFX_RESOURCE_TYPE_TEXTURE3D,                    ///< The resource is a 3-dimensional texture.
+} FfxResourceType;
+
+/// An enumeration for different heap types
+typedef enum FfxHeapType {
+
+    FFX_HEAP_TYPE_DEFAULT = 0,                      ///< Local memory.
+    FFX_HEAP_TYPE_UPLOAD                            ///< Heap used for uploading resources.
+} FfxHeapType;
+
+/// An enumberation for different render job types
+typedef enum FfxGpuJobType {
+
+    FFX_GPU_JOB_CLEAR_FLOAT = 0,                 ///< The GPU job is performing a floating-point clear.
+    FFX_GPU_JOB_COPY = 1,                        ///< The GPU job is performing a copy.
+    FFX_GPU_JOB_COMPUTE = 2,                     ///< The GPU job is performing a compute dispatch.
+} FfxGpuJobType;
+
+/// A typedef representing the graphics device.
+typedef void* FfxDevice;
+
+/// A typedef representing a command list or command buffer.
+typedef void* FfxCommandList;
+
+/// A typedef for a root signature.
+typedef void* FfxRootSignature;
+
+/// A typedef for a pipeline state object.
+typedef void* FfxPipeline;
+
+/// A structure encapasulating a collection of device capabilities.
+typedef struct FfxDeviceCapabilities {
+
+    FfxShaderModel                  minimumSupportedShaderModel;            ///< The minimum shader model supported by the device.
+    uint32_t                        waveLaneCountMin;                       ///< The minimum supported wavefront width.
+    uint32_t                        waveLaneCountMax;                       ///< The maximum supported wavefront width.
+    bool                            fp16Supported;                          ///< The device supports FP16 in hardware.
+    bool                            raytracingSupported;                    ///< The device supports raytracing.
+} FfxDeviceCapabilities;
+
+/// A structure encapsulating a 2-dimensional point, using 32bit unsigned integers.
+typedef struct FfxDimensions2D {
+
+    uint32_t                        width;                                  ///< The width of a 2-dimensional range.
+    uint32_t                        height;                                 ///< The height of a 2-dimensional range.
+} FfxDimensions2D;
+
+/// A structure encapsulating a 2-dimensional point,
+typedef struct FfxIntCoords2D {
+
+    int32_t                         x;                                      ///< The x coordinate of a 2-dimensional point.
+    int32_t                         y;                                      ///< The y coordinate of a 2-dimensional point.
+} FfxIntCoords2D;
+
+/// A structure encapsulating a 2-dimensional set of floating point coordinates.
+typedef struct FfxFloatCoords2D {
+
+    float                           x;                                      ///< The x coordinate of a 2-dimensional point.
+    float                           y;                                      ///< The y coordinate of a 2-dimensional point.
+} FfxFloatCoords2D;
+
+/// A structure describing a resource.
+typedef struct FfxResourceDescription {
+
+    FfxResourceType                 type;                                   ///< The type of the resource.
+    FfxSurfaceFormat                format;                                 ///< The surface format.
+    uint32_t                        width;                                  ///< The width of the resource.
+    uint32_t                        height;                                 ///< The height of the resource.
+    uint32_t                        depth;                                  ///< The depth of the resource.
+    uint32_t                        mipCount;                               ///< Number of mips (or 0 for full mipchain).
+    FfxResourceFlags                flags;                                  ///< A set of <c><i>FfxResourceFlags</i></c> flags.
+} FfxResourceDescription;
+
+/// An outward facing structure containing a resource
+typedef struct FfxResource {
+    void*                           resource;                               ///< pointer to the resource.
+    wchar_t                         name[64];
+    FfxResourceDescription          description;
+    FfxResourceStates               state;
+    bool                            isDepth;
+    uint64_t                        descriptorData;
+} FfxResource;
+
+/// An internal structure containing a handle to a resource and resource views
+typedef struct FfxResourceInternal {
+    int32_t                         internalIndex;                          ///< The index of the resource.
+} FfxResourceInternal;
+
+
+/// A structure defining a resource bind point
+typedef struct FfxResourceBinding
+{
+    uint32_t    slotIndex;
+    uint32_t    resourceIdentifier;
+    wchar_t     name[64];
+}FfxResourceBinding;
+
+/// A structure encapsulating a single pass of an algorithm.
+typedef struct FfxPipelineState {
+
+    FfxRootSignature                rootSignature;                                  ///< The pipelines rootSignature
+    FfxPipeline                     pipeline;                                       ///< The pipeline object
+    uint32_t                        uavCount;                                       ///< Count of UAVs used in this pipeline
+    uint32_t                        srvCount;                                       ///< Count of SRVs used in this pipeline
+    uint32_t                        constCount;                                     ///< Count of constant buffers used in this pipeline
+
+    FfxResourceBinding              uavResourceBindings[FFX_MAX_NUM_UAVS];          ///< Array of ResourceIdentifiers bound as UAVs
+    FfxResourceBinding              srvResourceBindings[FFX_MAX_NUM_SRVS];          ///< Array of ResourceIdentifiers bound as SRVs
+    FfxResourceBinding              cbResourceBindings[FFX_MAX_NUM_CONST_BUFFERS];  ///< Array of ResourceIdentifiers bound as CBs
+} FfxPipelineState;
+
+/// A structure containing the data required to create a resource.
+typedef struct FfxCreateResourceDescription {
+    
+    FfxHeapType                     heapType;                               ///< The heap type to hold the resource, typically <c><i>FFX_HEAP_TYPE_DEFAULT</i></c>.
+    FfxResourceDescription          resourceDescription;                    ///< A resource description.
+    FfxResourceStates               initalState;                            ///< The initial resource state.
+    uint32_t                        initDataSize;                           ///< Size of initial data buffer.
+    void*                           initData;                               ///< Buffer containing data to fill the resource.
+    const wchar_t*                  name;                                   ///< Name of the resource.
+    FfxResourceUsage                usage;                                  ///< Resource usage flags.
+    uint32_t                        id;                                     ///< Internal resource ID.
+} FfxCreateResourceDescription;
+
+/// A structure containing the description used to create a
+/// <c><i>FfxPipeline</i></c> structure.
+///
+/// A pipeline is the name given to a shader and the collection of state that
+/// is required to dispatch it. In the context of FSR2 and its architecture
+/// this means that a <c><i>FfxPipelineDescription</i></c> will map to either a
+/// monolithic object in an explicit API (such as a
+/// <c><i>PipelineStateObject</i></c> in DirectX 12). Or a shader and some
+/// ancillary API objects (in something like DirectX 11).
+///
+/// The <c><i>contextFlags</i></c> field contains a copy of the flags passed
+/// to <c><i>ffxFsr2ContextCreate</i></c> via the <c><i>flags</i></c> field of
+/// the <c><i>FfxFsr2InitializationParams</i></c> structure. These flags are
+/// used to determine which permutation of a pipeline for a specific
+/// <c><i>FfxFsr2Pass</i></c> should be used to implement the features required
+/// by each application, as well as to acheive the best performance on specific
+/// target hardware configurations.
+/// 
+/// When using one of the provided backends for FSR2 (such as DirectX 12 or
+/// Vulkan) the data required to create a pipeline is compiled offline and
+/// included into the backend library that you are using. For cases where the
+/// backend interface is overriden by providing custom callback function
+/// implementations care should be taken to respect the contents of the
+/// <c><i>contextFlags</i></c> field in order to correctly support the options
+/// provided by FSR2, and acheive best performance.
+///
+/// @ingroup FSR2
+typedef struct FfxPipelineDescription {
+
+    uint32_t                            contextFlags;                   ///< A collection of <c><i>FfxFsr2InitializationFlagBits</i></c> which were passed to the context.
+    FfxFilterType*                      samplers;                       ///< Array of static samplers.
+    size_t                              samplerCount;                   ///< The number of samples contained inside <c><i>samplers</i></c>.
+    const uint32_t*                     rootConstantBufferSizes;        ///< Array containing the sizes of the root constant buffers (count of 32 bit elements).
+    uint32_t                            rootConstantBufferCount;        ///< The number of root constants contained within <c><i>rootConstantBufferSizes</i></c>.
+} FfxPipelineDescription;
+
+/// A structure containing a constant buffer.
+typedef struct FfxConstantBuffer {
+
+    uint32_t                        uint32Size;                             ///< Size of 32 bit chunks used in the constant buffer
+    uint32_t                        data[FFX_MAX_CONST_SIZE];               ///< Constant buffer data
+}FfxConstantBuffer;
+
+/// A structure describing a clear render job.
+typedef struct FfxClearFloatJobDescription {
+
+    float                           color[4];                               ///< The clear color of the resource.
+    FfxResourceInternal             target;                                 ///< The resource to be cleared.
+} FfxClearFloatJobDescription;
+
+/// A structure describing a compute render job.
+typedef struct FfxComputeJobDescription {
+
+    FfxPipelineState                pipeline;                               ///< Compute pipeline for the render job.
+    uint32_t                        dimensions[3];                          ///< Dispatch dimensions.
+    FfxResourceInternal             srvs[FFX_MAX_NUM_SRVS];                 ///< SRV resources to be bound in the compute job.
+    wchar_t                         srvNames[FFX_MAX_NUM_SRVS][64];
+    FfxResourceInternal             uavs[FFX_MAX_NUM_UAVS];                 ///< UAV resources to be bound in the compute job.
+    uint32_t                        uavMip[FFX_MAX_NUM_UAVS];               ///< Mip level of UAV resources to be bound in the compute job.
+    wchar_t                         uavNames[FFX_MAX_NUM_UAVS][64];
+    FfxConstantBuffer               cbs[FFX_MAX_NUM_CONST_BUFFERS];         ///< Constant buffers to be bound in the compute job.
+    wchar_t                         cbNames[FFX_MAX_NUM_CONST_BUFFERS][64];
+    uint32_t                        cbSlotIndex[FFX_MAX_NUM_CONST_BUFFERS]; ///< Slot index in the descriptor table
+} FfxComputeJobDescription;
+
+/// A structure describing a copy render job.
+typedef struct FfxCopyJobDescription
+{
+    FfxResourceInternal                     src;                                    ///< Source resource for the copy.
+    FfxResourceInternal                     dst;                                    ///< Destination resource for the copy.
+} FfxCopyJobDescription;
+
+/// A structure describing a single render job.
+typedef struct FfxGpuJobDescription{
+
+    FfxGpuJobType                jobType;                                    ///< Type of the job.
+
+    union {
+        FfxClearFloatJobDescription clearJobDescriptor;                     ///< Clear job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_CLEAR_FLOAT</i></c>.
+        FfxCopyJobDescription       copyJobDescriptor;                      ///< Copy job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_COPY</i></c>.
+        FfxComputeJobDescription    computeJobDescriptor;                   ///< Compute job descriptor. Valid when <c><i>jobType</i></c> is <c><i>FFX_RENDER_JOB_COMPUTE</i></c>.
+    };
+} FfxGpuJobDescription;
+
+#ifdef __cplusplus
+}
+#endif  // #ifdef __cplusplus
diff --git a/thirdparty/amd-fsr2/ffx_util.h b/thirdparty/amd-fsr2/ffx_util.h
new file mode 100644
index 0000000000..ca4324ea83
--- /dev/null
+++ b/thirdparty/amd-fsr2/ffx_util.h
@@ -0,0 +1,78 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#pragma once
+
+#include "ffx_types.h"
+
+/// The value of Pi.
+const float FFX_PI = 3.141592653589793f;
+
+/// An epsilon value for floating point numbers.
+const float FFX_EPSILON = 1e-06f;
+
+/// Helper macro to create the version number.
+#define FFX_MAKE_VERSION(major, minor, patch) ((major << 22) | (minor << 12) | patch)
+
+///< Use this to specify no version.
+#define FFX_UNSPECIFIED_VERSION     0xFFFFAD00
+
+/// Helper macro to avoid warnings about unused variables.
+#define FFX_UNUSED(x)               ((void)(x))
+
+/// Helper macro to align an integer to the specified power of 2 boundary
+#define FFX_ALIGN_UP(x, y)          (((x) + ((y)-1)) & ~((y)-1))
+
+/// Helper macro to check if a value is aligned.
+#define FFX_IS_ALIGNED(x)           (((x) != 0) && ((x) & ((x)-1)))
+
+/// Helper macro to stringify a value.
+#define FFX_STR(s)                  FFX_XSTR(s)
+#define FFX_XSTR(s)                 #s
+
+/// Helper macro to forward declare a structure.
+#define FFX_FORWARD_DECLARE(x)      typedef struct x x
+
+/// Helper macro to return the maximum of two values.
+#define FFX_MAXIMUM(x, y)           (((x) > (y)) ? (x) : (y))
+
+/// Helper macro to return the minimum of two values.
+#define FFX_MINIMUM(x, y)           (((x) < (y)) ? (x) : (y))
+
+/// Helper macro to do safe free on a pointer.
+#define FFX_SAFE_FREE(x) \
+    if (x)               \
+    free(x)
+
+/// Helper macro to return the abs of an integer value.
+#define FFX_ABSOLUTE(x)                 (((x) < 0) ? (-(x)) : (x))
+
+/// Helper macro to return sign of a value.
+#define FFX_SIGN(x)                     (((x) < 0) ? -1 : 1)
+
+/// Helper macro to work out the number of elements in an array.
+#define FFX_ARRAY_ELEMENTS(x)           (int32_t)((sizeof(x) / sizeof(0 [x])) / ((size_t)(!(sizeof(x) % sizeof(0 [x])))))
+
+/// The maximum length of a path that can be specified to the FidelityFX API.
+#define FFX_MAXIMUM_PATH                (260)
+
+/// Helper macro to check if the specified key is set in a bitfield.
+#define FFX_CONTAINS_FLAG(options, key) ((options & key) == key)
diff --git a/thirdparty/amd-fsr2/patches/godot-changes.patch b/thirdparty/amd-fsr2/patches/godot-changes.patch
new file mode 100644
index 0000000000..513d8a5a29
--- /dev/null
+++ b/thirdparty/amd-fsr2/patches/godot-changes.patch
diff --git a/thirdparty/amd-fsr2/shaders/ffx_common_types.h b/thirdparty/amd-fsr2/shaders/ffx_common_types.h
new file mode 100644
index 0000000000..ddd17862b6
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_common_types.h
@@ -0,0 +1,429 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+#ifndef FFX_COMMON_TYPES_H
+#define FFX_COMMON_TYPES_H
+
+#if defined(FFX_CPU)
+#define FFX_PARAMETER_IN
+#define FFX_PARAMETER_OUT
+#define FFX_PARAMETER_INOUT
+#elif defined(FFX_HLSL)
+#define FFX_PARAMETER_IN        in
+#define FFX_PARAMETER_OUT       out
+#define FFX_PARAMETER_INOUT     inout
+#elif defined(FFX_GLSL)
+#define FFX_PARAMETER_IN        in
+#define FFX_PARAMETER_OUT       out
+#define FFX_PARAMETER_INOUT     inout
+#endif // #if defined(FFX_CPU)
+
+#if defined(FFX_CPU)
+/// A typedef for a boolean value.
+///
+/// @ingroup CPU
+typedef bool FfxBoolean;
+
+/// A typedef for a unsigned 8bit integer.
+///
+/// @ingroup CPU
+typedef uint8_t FfxUInt8;
+
+/// A typedef for a unsigned 16bit integer.
+///
+/// @ingroup CPU
+typedef uint16_t FfxUInt16;
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup CPU
+typedef uint32_t FfxUInt32;
+
+/// A typedef for a unsigned 64bit integer.
+///
+/// @ingroup CPU
+typedef uint64_t FfxUInt64;
+
+/// A typedef for a signed 8bit integer.
+///
+/// @ingroup CPU
+typedef int8_t FfxInt8;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup CPU
+typedef int16_t FfxInt16;
+
+/// A typedef for a signed 32bit integer.
+///
+/// @ingroup CPU
+typedef int32_t FfxInt32;
+
+/// A typedef for a signed 64bit integer.
+///
+/// @ingroup CPU
+typedef int64_t FfxInt64;
+
+/// A typedef for a floating point value.
+///
+/// @ingroup CPU
+typedef float FfxFloat32;
+
+/// A typedef for a 2-dimensional floating point value.
+///
+/// @ingroup CPU
+typedef float FfxFloat32x2[2];
+
+/// A typedef for a 3-dimensional floating point value.
+///
+/// @ingroup CPU
+typedef float FfxFloat32x3[3];
+
+/// A typedef for a 4-dimensional floating point value.
+///
+/// @ingroup CPU
+typedef float FfxFloat32x4[4];
+
+/// A typedef for a 2-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPU
+typedef uint32_t FfxUInt32x2[2];
+
+/// A typedef for a 3-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPU
+typedef uint32_t FfxUInt32x3[3];
+
+/// A typedef for a 4-dimensional 32bit unsigned integer.
+///
+/// @ingroup CPU
+typedef uint32_t FfxUInt32x4[4];
+#endif // #if defined(FFX_CPU)
+
+#if defined(FFX_HLSL)
+/// A typedef for a boolean value.
+///
+/// @ingroup GPU
+typedef bool FfxBoolean;
+
+#if FFX_HLSL_6_2
+typedef float32_t   FfxFloat32;
+typedef float32_t2  FfxFloat32x2;
+typedef float32_t3  FfxFloat32x3;
+typedef float32_t4  FfxFloat32x4;
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup GPU
+typedef uint32_t    FfxUInt32;
+typedef uint32_t2   FfxUInt32x2;
+typedef uint32_t3   FfxUInt32x3;
+typedef uint32_t4   FfxUInt32x4;
+typedef int32_t     FfxInt32;
+typedef int32_t2    FfxInt32x2;
+typedef int32_t3    FfxInt32x3;
+typedef int32_t4    FfxInt32x4;
+#else
+#define FfxFloat32   float
+#define FfxFloat32x2 float2
+#define FfxFloat32x3 float3
+#define FfxFloat32x4 float4
+
+/// A typedef for a unsigned 32bit integer.
+///
+/// @ingroup GPU
+typedef uint        FfxUInt32;
+typedef uint2       FfxUInt32x2;
+typedef uint3       FfxUInt32x3;
+typedef uint4       FfxUInt32x4;
+typedef int         FfxInt32;
+typedef int2        FfxInt32x2;
+typedef int3        FfxInt32x3;
+typedef int4        FfxInt32x4;
+#endif // #if defined(FFX_HLSL_6_2)
+
+#if FFX_HALF
+#if FFX_HLSL_6_2
+typedef float16_t   FfxFloat16;
+typedef float16_t2  FfxFloat16x2;
+typedef float16_t3  FfxFloat16x3;
+typedef float16_t4  FfxFloat16x4;
+
+/// A typedef for an unsigned 16bit integer.
+///
+/// @ingroup GPU
+typedef uint16_t    FfxUInt16;
+typedef uint16_t2   FfxUInt16x2;
+typedef uint16_t3   FfxUInt16x3;
+typedef uint16_t4   FfxUInt16x4;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup GPU
+typedef int16_t     FfxInt16;
+typedef int16_t2    FfxInt16x2;
+typedef int16_t3    FfxInt16x3;
+typedef int16_t4    FfxInt16x4;
+#else
+typedef min16float  FfxFloat16;
+typedef min16float2 FfxFloat16x2;
+typedef min16float3 FfxFloat16x3;
+typedef min16float4 FfxFloat16x4;
+
+/// A typedef for an unsigned 16bit integer.
+///
+/// @ingroup GPU
+typedef min16uint   FfxUInt16;
+typedef min16uint2  FfxUInt16x2;
+typedef min16uint3  FfxUInt16x3;
+typedef min16uint4  FfxUInt16x4;
+
+/// A typedef for a signed 16bit integer.
+///
+/// @ingroup GPU
+typedef min16int    FfxInt16;
+typedef min16int2   FfxInt16x2;
+typedef min16int3   FfxInt16x3;
+typedef min16int4   FfxInt16x4;
+#endif  // FFX_HLSL_6_2
+#endif // FFX_HALF
+#endif // #if defined(FFX_HLSL)
+
+#if defined(FFX_GLSL)
+/// A typedef for a boolean value.
+///
+/// @ingroup GPU
+#define FfxBoolean   bool
+#define FfxFloat32   float
+#define FfxFloat32x2 vec2
+#define FfxFloat32x3 vec3
+#define FfxFloat32x4 vec4
+#define FfxUInt32    uint
+#define FfxUInt32x2  uvec2
+#define FfxUInt32x3  uvec3
+#define FfxUInt32x4  uvec4
+#define FfxInt32     int
+#define FfxInt32x2   ivec2
+#define FfxInt32x3   ivec3
+#define FfxInt32x4   ivec4
+#if FFX_HALF
+#define FfxFloat16   float16_t
+#define FfxFloat16x2 f16vec2
+#define FfxFloat16x3 f16vec3
+#define FfxFloat16x4 f16vec4
+#define FfxUInt16    uint16_t
+#define FfxUInt16x2  u16vec2
+#define FfxUInt16x3  u16vec3
+#define FfxUInt16x4  u16vec4
+#define FfxInt16     int16_t
+#define FfxInt16x2   i16vec2
+#define FfxInt16x3   i16vec3
+#define FfxInt16x4   i16vec4
+#endif // FFX_HALF
+#endif // #if defined(FFX_GLSL)
+
+// Global toggles:
+// #define FFX_HALF            (1)
+// #define FFX_HLSL_6_2        (1)
+
+#if FFX_HALF
+
+#if FFX_HLSL_6_2
+
+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType##16_t TypeName;
+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType##16_t, COL> TypeName;
+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
+
+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType##16_t TypeName;
+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType##16_t, COL> TypeName;
+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType##16_t, ROW, COL> TypeName;
+
+#else //FFX_HLSL_6_2
+
+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef min16##BaseComponentType TypeName;
+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<min16##BaseComponentType, COL> TypeName;
+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<min16##BaseComponentType, ROW, COL> TypeName;
+
+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           FFX_MIN16_SCALAR( TypeName, BaseComponentType );
+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL );
+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL );
+
+#endif //FFX_HLSL_6_2
+
+#else //FFX_HALF
+
+#define FFX_MIN16_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType TypeName;
+#define FFX_MIN16_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType, COL> TypeName;
+#define FFX_MIN16_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
+
+#define FFX_16BIT_SCALAR( TypeName, BaseComponentType )           typedef BaseComponentType TypeName;
+#define FFX_16BIT_VECTOR( TypeName, BaseComponentType, COL )      typedef vector<BaseComponentType, COL> TypeName;
+#define FFX_16BIT_MATRIX( TypeName, BaseComponentType, ROW, COL ) typedef matrix<BaseComponentType, ROW, COL> TypeName;
+
+#endif //FFX_HALF
+
+#if defined(FFX_GPU)
+// Common typedefs:
+#if defined(FFX_HLSL)
+FFX_MIN16_SCALAR( FFX_MIN16_F , float );
+FFX_MIN16_VECTOR( FFX_MIN16_F2, float, 2 );
+FFX_MIN16_VECTOR( FFX_MIN16_F3, float, 3 );
+FFX_MIN16_VECTOR( FFX_MIN16_F4, float, 4 );
+
+FFX_MIN16_SCALAR( FFX_MIN16_I,  int );
+FFX_MIN16_VECTOR( FFX_MIN16_I2, int, 2 );
+FFX_MIN16_VECTOR( FFX_MIN16_I3, int, 3 );
+FFX_MIN16_VECTOR( FFX_MIN16_I4, int, 4 );
+
+FFX_MIN16_SCALAR( FFX_MIN16_U,  uint );
+FFX_MIN16_VECTOR( FFX_MIN16_U2, uint, 2 );
+FFX_MIN16_VECTOR( FFX_MIN16_U3, uint, 3 );
+FFX_MIN16_VECTOR( FFX_MIN16_U4, uint, 4 );
+
+FFX_16BIT_SCALAR( FFX_F16_t , float );
+FFX_16BIT_VECTOR( FFX_F16_t2, float, 2 );
+FFX_16BIT_VECTOR( FFX_F16_t3, float, 3 );
+FFX_16BIT_VECTOR( FFX_F16_t4, float, 4 );
+
+FFX_16BIT_SCALAR( FFX_I16_t,  int );
+FFX_16BIT_VECTOR( FFX_I16_t2, int, 2 );
+FFX_16BIT_VECTOR( FFX_I16_t3, int, 3 );
+FFX_16BIT_VECTOR( FFX_I16_t4, int, 4 );
+
+FFX_16BIT_SCALAR( FFX_U16_t,  uint );
+FFX_16BIT_VECTOR( FFX_U16_t2, uint, 2 );
+FFX_16BIT_VECTOR( FFX_U16_t3, uint, 3 );
+FFX_16BIT_VECTOR( FFX_U16_t4, uint, 4 );
+
+#define TYPEDEF_MIN16_TYPES(Prefix)           \
+typedef FFX_MIN16_F     Prefix##_F;           \
+typedef FFX_MIN16_F2    Prefix##_F2;          \
+typedef FFX_MIN16_F3    Prefix##_F3;          \
+typedef FFX_MIN16_F4    Prefix##_F4;          \
+typedef FFX_MIN16_I     Prefix##_I;           \
+typedef FFX_MIN16_I2    Prefix##_I2;          \
+typedef FFX_MIN16_I3    Prefix##_I3;          \
+typedef FFX_MIN16_I4    Prefix##_I4;          \
+typedef FFX_MIN16_U     Prefix##_U;           \
+typedef FFX_MIN16_U2    Prefix##_U2;          \
+typedef FFX_MIN16_U3    Prefix##_U3;          \
+typedef FFX_MIN16_U4    Prefix##_U4;
+
+#define TYPEDEF_16BIT_TYPES(Prefix)           \
+typedef FFX_16BIT_F     Prefix##_F;           \
+typedef FFX_16BIT_F2    Prefix##_F2;          \
+typedef FFX_16BIT_F3    Prefix##_F3;          \
+typedef FFX_16BIT_F4    Prefix##_F4;          \
+typedef FFX_16BIT_I     Prefix##_I;           \
+typedef FFX_16BIT_I2    Prefix##_I2;          \
+typedef FFX_16BIT_I3    Prefix##_I3;          \
+typedef FFX_16BIT_I4    Prefix##_I4;          \
+typedef FFX_16BIT_U     Prefix##_U;           \
+typedef FFX_16BIT_U2    Prefix##_U2;          \
+typedef FFX_16BIT_U3    Prefix##_U3;          \
+typedef FFX_16BIT_U4    Prefix##_U4;
+
+#define TYPEDEF_FULL_PRECISION_TYPES(Prefix)  \
+typedef FfxFloat32      Prefix##_F;           \
+typedef FfxFloat32x2    Prefix##_F2;          \
+typedef FfxFloat32x3    Prefix##_F3;          \
+typedef FfxFloat32x4    Prefix##_F4;          \
+typedef FfxInt32        Prefix##_I;           \
+typedef FfxInt32x2      Prefix##_I2;          \
+typedef FfxInt32x3      Prefix##_I3;          \
+typedef FfxInt32x4      Prefix##_I4;          \
+typedef FfxUInt32       Prefix##_U;           \
+typedef FfxUInt32x2     Prefix##_U2;          \
+typedef FfxUInt32x3     Prefix##_U3;          \
+typedef FfxUInt32x4     Prefix##_U4;
+#endif // #if defined(FFX_HLSL)
+
+#if defined(FFX_GLSL)
+
+#if FFX_HALF
+
+#define  FFX_MIN16_F  float16_t
+#define  FFX_MIN16_F2 f16vec2
+#define  FFX_MIN16_F3 f16vec3
+#define  FFX_MIN16_F4 f16vec4
+
+#define  FFX_MIN16_I  int16_t
+#define  FFX_MIN16_I2 i16vec2
+#define  FFX_MIN16_I3 i16vec3
+#define  FFX_MIN16_I4 i16vec4
+
+#define  FFX_MIN16_U  uint16_t
+#define  FFX_MIN16_U2 u16vec2
+#define  FFX_MIN16_U3 u16vec3
+#define  FFX_MIN16_U4 u16vec4
+
+#define FFX_16BIT_F  float16_t
+#define FFX_16BIT_F2 f16vec2
+#define FFX_16BIT_F3 f16vec3
+#define FFX_16BIT_F4 f16vec4
+
+#define FFX_16BIT_I  int16_t
+#define FFX_16BIT_I2 i16vec2
+#define FFX_16BIT_I3 i16vec3
+#define FFX_16BIT_I4 i16vec4
+
+#define FFX_16BIT_U  uint16_t
+#define FFX_16BIT_U2 u16vec2
+#define FFX_16BIT_U3 u16vec3
+#define FFX_16BIT_U4 u16vec4
+
+#else // FFX_HALF
+
+#define  FFX_MIN16_F  float
+#define  FFX_MIN16_F2 vec2
+#define  FFX_MIN16_F3 vec3
+#define  FFX_MIN16_F4 vec4
+
+#define  FFX_MIN16_I  int
+#define  FFX_MIN16_I2 ivec2
+#define  FFX_MIN16_I3 ivec3
+#define  FFX_MIN16_I4 ivec4
+
+#define  FFX_MIN16_U  uint
+#define  FFX_MIN16_U2 uvec2
+#define  FFX_MIN16_U3 uvec3
+#define  FFX_MIN16_U4 uvec4
+
+#define FFX_16BIT_F  float
+#define FFX_16BIT_F2 vec2
+#define FFX_16BIT_F3 vec3
+#define FFX_16BIT_F4 vec4
+
+#define FFX_16BIT_I  int
+#define FFX_16BIT_I2 ivec2
+#define FFX_16BIT_I3 ivec3
+#define FFX_16BIT_I4 ivec4
+
+#define FFX_16BIT_U  uint
+#define FFX_16BIT_U2 uvec2
+#define FFX_16BIT_U3 uvec3
+#define FFX_16BIT_U4 uvec4
+
+#endif // FFX_HALF
+
+#endif // #if defined(FFX_GLSL)
+
+#endif // #if defined(FFX_GPU)
+#endif // #ifndef FFX_COMMON_TYPES_H
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core.h b/thirdparty/amd-fsr2/shaders/ffx_core.h
new file mode 100644
index 0000000000..4e687d6e3d
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_core.h
@@ -0,0 +1,52 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+/// @defgroup Core
+/// @defgroup HLSL
+/// @defgroup GLSL
+/// @defgroup GPU
+/// @defgroup CPU
+/// @defgroup CAS
+/// @defgroup FSR1
+
+#if !defined(FFX_CORE_H)
+#define FFX_CORE_H
+
+#include "ffx_common_types.h"
+
+#if defined(FFX_CPU)
+    #include "ffx_core_cpu.h"
+#endif // #if defined(FFX_CPU)
+
+#if defined(FFX_GLSL) && defined(FFX_GPU)
+    #include "ffx_core_glsl.h"
+#endif // #if defined(FFX_GLSL) && defined(FFX_GPU)
+
+#if defined(FFX_HLSL) && defined(FFX_GPU)
+    #include "ffx_core_hlsl.h"
+#endif // #if defined(FFX_HLSL) && defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+    #include "ffx_core_gpu_common.h"
+    #include "ffx_core_gpu_common_half.h"
+    #include "ffx_core_portability.h"
+#endif // #if defined(FFX_GPU)
+#endif // #if !defined(FFX_CORE_H)
+\ No newline at end of file
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h b/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
new file mode 100644
index 0000000000..3bf0295bfc
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_cpu.h
@@ -0,0 +1,332 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+/// A define for a true value in a boolean expression.
+///
+/// @ingroup CPU
+#define FFX_TRUE (1)
+
+/// A define for a false value in a boolean expression.
+///
+/// @ingroup CPU
+#define FFX_FALSE (0)
+ 
+#if !defined(FFX_STATIC)
+/// A define to abstract declaration of static variables and functions.
+///
+/// @ingroup CPU
+#define FFX_STATIC static
+#endif // #if !defined(FFX_STATIC)
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif
+
+/// Interpret the bit layout of an IEEE-754 floating point value as an unsigned integer.
+///
+/// @param [in] x               A 32bit floating value.
+///
+/// @returns
+/// An unsigned 32bit integer value containing the bit pattern of <c><i>x</i></c>.
+/// 
+/// @ingroup CPU
+FFX_STATIC FfxUInt32 ffxAsUInt32(FfxFloat32 x)
+{
+    union
+    {
+        FfxFloat32 f;
+        FfxUInt32  u;
+    } bits;
+
+    bits.f = x;
+    return bits.u;
+}
+
+FFX_STATIC FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
+{
+    return a[0] * b[0] + a[1] * b[1];
+}
+
+FFX_STATIC FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
+{
+    return a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
+}
+
+FFX_STATIC FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
+{
+    return a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup CPU
+FFX_STATIC FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
+{
+    return y * t + (-x * t + x);
+}
+
+/// Compute the reciprocal of a value.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal value of <c><i>x</i></c>.
+///
+/// @ingroup CPU
+FFX_STATIC FfxFloat32 ffxReciprocal(FfxFloat32 a)
+{
+    return 1.0f / a;
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup CPU
+FFX_STATIC FfxFloat32 ffxSqrt(FfxFloat32 x)
+{
+    return sqrt(x);
+}
+
+FFX_STATIC FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+{
+    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. 
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup CPU
+FFX_STATIC FfxFloat32 ffxFract(FfxFloat32 a)
+{
+    return a - floor(a);
+}
+
+/// Compute the reciprocal square root of a value.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal square root value of <c><i>x</i></c>.
+///
+/// @ingroup CPU
+FFX_STATIC FfxFloat32 rsqrt(FfxFloat32 a)
+{
+    return ffxReciprocal(ffxSqrt(a));
+}
+
+FFX_STATIC FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
+{
+    return x < y ? x : y;
+}
+
+FFX_STATIC FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
+{
+    return x < y ? x : y;
+}
+
+FFX_STATIC FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
+{
+    return x > y ? x : y;
+}
+
+FFX_STATIC FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
+{
+    return x > y ? x : y;
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup CPU
+FFX_STATIC FfxFloat32 ffxSaturate(FfxFloat32 a)
+{
+    return ffxMin(1.0f, ffxMax(0.0f, a));
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+FFX_STATIC void opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+    d[0] = a[0] + b;
+    d[1] = a[1] + b;
+    d[2] = a[2] + b;
+    return;
+}
+
+FFX_STATIC void opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+    d[0] = a[0];
+    d[1] = a[1];
+    d[2] = a[2];
+    return;
+}
+
+FFX_STATIC void opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
+{
+    d[0] = a[0] * b[0];
+    d[1] = a[1] * b[1];
+    d[2] = a[2] * b[2];
+    return;
+}
+
+FFX_STATIC void opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+    d[0] = a[0] * b;
+    d[1] = a[1] * b;
+    d[2] = a[2] * b;
+    return;
+}
+
+FFX_STATIC void opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+    d[0] = ffxReciprocal(a[0]);
+    d[1] = ffxReciprocal(a[1]);
+    d[2] = ffxReciprocal(a[2]);
+    return;
+}
+
+/// Convert FfxFloat32 to half (in lower 16-bits of output).
+/// 
+/// This function implements the same fast technique that is documented here: ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
+/// 
+/// The function supports denormals.
+/// 
+/// Some conversion rules are to make computations possibly "safer" on the GPU,
+///  -INF & -NaN -> -65504
+///  +INF & +NaN -> +65504
+///
+/// @param [in] f               The 32bit floating point value to convert.
+/// 
+/// @returns
+/// The closest 16bit floating point value to <c><i>f</i></c>.
+/// 
+/// @ingroup CPU
+FFX_STATIC FfxUInt32 f32tof16(FfxFloat32 f)
+{
+    static FfxUInt16 base[512] = {
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+        0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400,
+        0x0800, 0x0c00, 0x1000, 0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400, 0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000,
+        0x5400, 0x5800, 0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff,
+        0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x7bff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
+        0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002,
+        0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400, 0xa800, 0xac00,
+        0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00, 0xf000, 0xf400, 0xf800,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff,
+        0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff, 0xfbff
+    };
+    
+    static FfxUInt8 shift[512] = {
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
+        0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
+        0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+    };
+
+    union
+    {
+        FfxFloat32      f;
+        FfxUInt32 u;
+    } bits;
+
+    bits.f       = f;
+    FfxUInt32 u = bits.u;
+    FfxUInt32 i = u >> 23;
+    return (FfxUInt32)(base[i]) + ((u & 0x7fffff) >> shift[i]);
+}
+
+/// Pack 2x32-bit floating point values in a single 32bit value.
+///
+/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
+/// 32bit unsigned integer respectively.
+///
+/// @param [in] value               A 2-dimensional floating point value to convert and pack.
+///
+/// @returns
+/// A packed 32bit value containing 2 16bit floating point values.
+///
+/// @ingroup CPU
+FFX_STATIC FfxUInt32 packHalf2x16(FfxFloat32x2 a)
+{
+    return f32tof16(a[0]) + (f32tof16(a[1]) << 16);
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h b/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
new file mode 100644
index 0000000000..6ec58f3c62
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_glsl.h
@@ -0,0 +1,1669 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+/// A define for abstracting shared memory between shading languages.
+///
+/// @ingroup GPU
+#define FFX_GROUPSHARED shared
+
+/// A define for abstracting compute memory barriers between shading languages.
+///
+/// @ingroup GPU
+#define FFX_GROUP_MEMORY_BARRIER() barrier()
+
+/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
+///
+/// @ingroup GPU
+#define FFX_STATIC
+
+/// A define for abstracting loop unrolling between shading languages.
+///
+/// @ingroup GPU 
+#define FFX_UNROLL
+
+/// A define for abstracting a 'greater than' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_GREATER_THAN(x, y) greaterThan(x, y)
+
+/// A define for abstracting a 'greater than or equal' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_GREATER_THAN_EQUAL(x, y) greaterThanEqual(x, y)
+
+/// A define for abstracting a 'less than' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_LESS_THAN(x, y) lessThan(x, y)
+
+/// A define for abstracting a 'less than or equal' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_LESS_THAN_EQUAL(x, y) lessThanEqual(x, y)
+
+/// A define for abstracting an 'equal' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_EQUAL(x, y) equal(x, y)
+
+/// A define for abstracting a 'not equal' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_NOT_EQUAL(x, y) notEqual(x, y)
+
+/// Broadcast a scalar value to a 1-dimensional floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_FLOAT32(x)   FfxFloat32(x)
+
+/// Broadcast a scalar value to a 2-dimensional floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32x2(FfxFloat32(x))
+
+/// Broadcast a scalar value to a 3-dimensional floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32x3(FfxFloat32(x))
+
+/// Broadcast a scalar value to a 4-dimensional floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32x4(FfxFloat32(x))
+
+/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_UINT32(x)   FfxUInt32(x)
+
+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_UINT32X2(x) FfxUInt32x2(FfxUInt32(x))
+
+/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_UINT32X3(x) FfxUInt32x3(FfxUInt32(x))
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_UINT32X4(x) FfxUInt32x4(FfxUInt32(x))
+
+/// Broadcast a scalar value to a 1-dimensional signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_INT32(x)   FfxInt32(x)
+
+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_INT32X2(x) FfxInt32x2(FfxInt32(x))
+
+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_INT32X3(x) FfxInt32x3(FfxInt32(x))
+
+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_INT32X4(x) FfxInt32x4(FfxInt32(x))
+
+/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_FLOAT16(x)   FFX_MIN16_F(x)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_FLOAT16X2(x) FFX_MIN16_F2(FFX_MIN16_F(x))
+
+/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_FLOAT16X3(x) FFX_MIN16_F3(FFX_MIN16_F(x))
+
+/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_FLOAT16X4(x) FFX_MIN16_F4(FFX_MIN16_F(x))
+
+/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_UINT16(x)   FFX_MIN16_U(x)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_UINT16X2(x) FFX_MIN16_U2(FFX_MIN16_U(x))
+
+/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_UINT16X3(x) FFX_MIN16_U3(FFX_MIN16_U(x))
+
+/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_UINT16X4(x) FFX_MIN16_U4(FFX_MIN16_U(x))
+
+/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_INT16(x)   FFX_MIN16_I(x)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_INT16X2(x) FFX_MIN16_I2(FFX_MIN16_I(x))
+
+/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_INT16X3(x) FFX_MIN16_I3(FFX_MIN16_I(x))
+
+/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_INT16X4(x) FFX_MIN16_I4(FFX_MIN16_I(x))
+
+#if !defined(FFX_SKIP_EXT)
+#if FFX_HALF
+    #extension GL_EXT_shader_16bit_storage : require
+    #extension GL_EXT_shader_explicit_arithmetic_types : require
+#endif // FFX_HALF
+
+#if defined(FFX_LONG)
+    #extension GL_ARB_gpu_shader_int64 : require
+    #extension GL_NV_shader_atomic_int64 : require
+#endif // #if defined(FFX_LONG)
+
+#if defined(FFX_WAVE)
+    #extension GL_KHR_shader_subgroup_arithmetic : require
+    #extension GL_KHR_shader_subgroup_ballot : require
+    #extension GL_KHR_shader_subgroup_quad : require
+    #extension GL_KHR_shader_subgroup_shuffle : require
+#endif // #if defined(FFX_WAVE)
+#endif // #if !defined(FFX_SKIP_EXT)
+
+// Forward declarations
+FfxFloat32   ffxSqrt(FfxFloat32 x);
+FfxFloat32x2 ffxSqrt(FfxFloat32x2 x);
+FfxFloat32x3 ffxSqrt(FfxFloat32x3 x);
+FfxFloat32x4 ffxSqrt(FfxFloat32x4 x);
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup GLSL
+FfxFloat32 ffxAsFloat(FfxUInt32 x)
+{
+    return uintBitsToFloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup GLSL
+FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
+{
+    return uintBitsToFloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup GLSL
+FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
+{
+    return uintBitsToFloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup GLSL
+FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
+{
+    return uintBitsToFloat(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup GLSL
+FfxUInt32 ffxAsUInt32(FfxFloat32 x)
+{
+    return floatBitsToUint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup GLSL
+FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
+{
+    return floatBitsToUint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup GLSL
+FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
+{
+    return floatBitsToUint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup GLSL
+FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
+{
+    return floatBitsToUint(x);
+}
+
+/// Convert a 32bit IEEE 754 floating point value to its nearest 16bit equivalent.
+///
+/// @param [in] value               The value to convert.
+/// 
+/// @returns
+/// The nearest 16bit equivalent of <c><i>value</i></c>.
+/// 
+/// @ingroup GLSL
+FfxUInt32 f32tof16(FfxFloat32 value)
+{
+    return packHalf2x16(FfxFloat32x2(value, 0.0));
+}
+
+/// Broadcast a scalar value to a 2-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
+{
+    return FfxFloat32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
+{
+    return FfxFloat32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
+{
+    return FfxFloat32x4(value, value, value, value);
+}
+
+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxInt32x2 ffxBroadcast2(FfxInt32 value)
+{
+    return FfxInt32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxInt32x3 ffxBroadcast3(FfxInt32 value)
+{
+    return FfxInt32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxInt32x4 ffxBroadcast4(FfxInt32 value)
+{
+    return FfxInt32x4(value, value, value, value);
+}
+
+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
+{
+    return FfxUInt32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
+{
+    return FfxUInt32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup GLSL
+FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
+{
+    return FfxUInt32x4(value, value, value, value);
+}
+
+///
+///
+/// @ingroup GLSL
+FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
+{
+    return bitfieldExtract(src, FfxInt32(off), FfxInt32(bits));
+}
+
+///
+///
+/// @ingroup GLSL
+FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
+{
+    return (ins & mask) | (src & (~mask));
+}
+
+// Proxy for V_BFI_B32 where the 'mask' is set as 'bits', 'mask=(1<<bits)-1', and 'bits' needs to be an immediate.
+///
+///
+/// @ingroup GLSL
+FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
+{
+    return bitfieldInsert(src, ins, 0, FfxInt32(bits));
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
+{
+    return mix(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
+{
+    return mix(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
+{
+    return mix(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
+{
+    return mix(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
+{
+    return mix(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
+{
+    return mix(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the GLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
+{
+    return mix(x, y, t);
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single V_MAX3_F32 operation on
+/// GCN or RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
+/// GCN or RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
+/// GCN or RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
+/// GCN or RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
+/// GCN or RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
+/// GCN or RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</i></c> operation on
+/// GCN and RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single V_MIN3_F32 operation on
+/// GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup GLSL
+FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the reciprocal of a value.
+///
+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal value of <c><i>x</i></c>.
+/// 
+/// @ingroup GLSL
+FfxFloat32 rcp(FfxFloat32 x)
+{
+    return FfxFloat32(1.0) / x;
+}
+
+/// Compute the reciprocal of a value.
+///
+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal value of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x2 rcp(FfxFloat32x2 x)
+{
+    return ffxBroadcast2(1.0) / x;
+}
+
+/// Compute the reciprocal of a value.
+///
+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal value of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x3 rcp(FfxFloat32x3 x)
+{
+    return ffxBroadcast3(1.0) / x;
+}
+
+/// Compute the reciprocal of a value.
+///
+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rcp</i></c> can be used.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal value of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x4 rcp(FfxFloat32x4 x)
+{
+    return ffxBroadcast4(1.0) / x;
+}
+
+/// Compute the reciprocal square root of a value.
+///
+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal square root value of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32 rsqrt(FfxFloat32 x)
+{
+    return FfxFloat32(1.0) / ffxSqrt(x);
+}
+
+/// Compute the reciprocal square root of a value.
+///
+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal square root value of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x2 rsqrt(FfxFloat32x2 x)
+{
+    return ffxBroadcast2(1.0) / ffxSqrt(x);
+}
+
+/// Compute the reciprocal square root of a value.
+///
+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal square root value of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x3 rsqrt(FfxFloat32x3 x)
+{
+    return ffxBroadcast3(1.0) / ffxSqrt(x);
+}
+
+/// Compute the reciprocal square root of a value.
+///
+/// NOTE: This function is only provided for GLSL. In HLSL the intrinsic function <c><i>rsqrt</i></c> can be used.
+///
+/// @param [in] x               The value to compute the reciprocal for.
+///
+/// @returns
+/// The reciprocal square root value of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x4 rsqrt(FfxFloat32x4 x)
+{
+    return ffxBroadcast4(1.0) / ffxSqrt(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32 ffxSaturate(FfxFloat32 x)
+{
+    return clamp(x, FfxFloat32(0.0), FfxFloat32(1.0));
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
+{
+    return clamp(x, ffxBroadcast2(0.0), ffxBroadcast2(1.0));
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
+{
+    return clamp(x, ffxBroadcast3(0.0), ffxBroadcast3(1.0));
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup GLSL
+FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
+{
+    return clamp(x, ffxBroadcast4(0.0), ffxBroadcast4(1.0));
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32 ffxFract(FfxFloat32 x)
+{
+    return fract(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxFract(FfxFloat32x2 x)
+{
+    return fract(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxFract(FfxFloat32x3 x)
+{
+    return fract(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxFract(FfxFloat32x4 x)
+{
+    return fract(x);
+}
+
+FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+{
+    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
+}
+
+#if FFX_HALF
+
+#define FFX_UINT32_TO_FLOAT16X2(x) unpackFloat2x16(FfxUInt32(x))
+
+FfxFloat16x4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
+{
+    return FfxFloat16x4(unpackFloat2x16(x.x), unpackFloat2x16(x.y));
+}
+#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
+#define FFX_UINT32_TO_UINT16X2(x) unpackUint2x16(FfxUInt32(x))
+#define FFX_UINT32X2_TO_UINT16X4(x) unpackUint4x16(pack64(FfxUInt32x2(x)))
+//------------------------------------------------------------------------------------------------------------------------------
+#define FFX_FLOAT16X2_TO_UINT32(x) packFloat2x16(FfxFloat16x2(x))
+FfxUInt32x2 ffxFloat16x4ToUint32x2(FfxFloat16x4 x)
+{
+    return FfxUInt32x2(packFloat2x16(x.xy), packFloat2x16(x.zw));
+}
+#define FFX_FLOAT16X4_TO_UINT32X2(x) ffxFloat16x4ToUint32x2(FfxFloat16x4(x))
+#define FFX_UINT16X2_TO_UINT32(x) packUint2x16(FfxUInt16x2(x))
+#define FFX_UINT16X4_TO_UINT32X2(x) unpack32(packUint4x16(FfxUInt16x4(x)))
+//==============================================================================================================================
+#define FFX_TO_UINT16(x) halfBitsToUint16(FfxFloat16(x))
+#define FFX_TO_UINT16X2(x) halfBitsToUint16(FfxFloat16x2(x))
+#define FFX_TO_UINT16X3(x) halfBitsToUint16(FfxFloat16x3(x))
+#define FFX_TO_UINT16X4(x) halfBitsToUint16(FfxFloat16x4(x))
+//------------------------------------------------------------------------------------------------------------------------------
+#define FFX_TO_FLOAT16(x) uint16BitsToHalf(FfxUInt16(x))
+#define FFX_TO_FLOAT16X2(x) uint16BitsToHalf(FfxUInt16x2(x))
+#define FFX_TO_FLOAT16X3(x) uint16BitsToHalf(FfxUInt16x3(x))
+#define FFX_TO_FLOAT16X4(x) uint16BitsToHalf(FfxUInt16x4(x))
+//==============================================================================================================================
+FfxFloat16 ffxBroadcastFloat16(FfxFloat16 a)
+{
+    return FfxFloat16(a);
+}
+FfxFloat16x2 ffxBroadcastFloat16x2(FfxFloat16 a)
+{
+    return FfxFloat16x2(a, a);
+}
+FfxFloat16x3 ffxBroadcastFloat16x3(FfxFloat16 a)
+{
+    return FfxFloat16x3(a, a, a);
+}
+FfxFloat16x4 ffxBroadcastFloat16x4(FfxFloat16 a)
+{
+    return FfxFloat16x4(a, a, a, a);
+}
+#define FFX_BROADCAST_FLOAT16(a)   FfxFloat16(a)
+#define FFX_BROADCAST_FLOAT16X2(a) FfxFloat16x2(FfxFloat16(a))
+#define FFX_BROADCAST_FLOAT16X3(a) FfxFloat16x3(FfxFloat16(a))
+#define FFX_BROADCAST_FLOAT16X4(a) FfxFloat16x4(FfxFloat16(a))
+//------------------------------------------------------------------------------------------------------------------------------
+FfxInt16 ffxBroadcastInt16(FfxInt16 a)
+{
+    return FfxInt16(a);
+}
+FfxInt16x2 ffxBroadcastInt16x2(FfxInt16 a)
+{
+    return FfxInt16x2(a, a);
+}
+FfxInt16x3 ffxBroadcastInt16x3(FfxInt16 a)
+{
+    return FfxInt16x3(a, a, a);
+}
+FfxInt16x4 ffxBroadcastInt16x4(FfxInt16 a)
+{
+    return FfxInt16x4(a, a, a, a);
+}
+#define FFX_BROADCAST_INT16(a)   FfxInt16(a)
+#define FFX_BROADCAST_INT16X2(a) FfxInt16x2(FfxInt16(a))
+#define FFX_BROADCAST_INT16X3(a) FfxInt16x3(FfxInt16(a))
+#define FFX_BROADCAST_INT16X4(a) FfxInt16x4(FfxInt16(a))
+//------------------------------------------------------------------------------------------------------------------------------
+FfxUInt16 ffxBroadcastUInt16(FfxUInt16 a)
+{
+    return FfxUInt16(a);
+}
+FfxUInt16x2 ffxBroadcastUInt16x2(FfxUInt16 a)
+{
+    return FfxUInt16x2(a, a);
+}
+FfxUInt16x3 ffxBroadcastUInt16x3(FfxUInt16 a)
+{
+    return FfxUInt16x3(a, a, a);
+}
+FfxUInt16x4 ffxBroadcastUInt16x4(FfxUInt16 a)
+{
+    return FfxUInt16x4(a, a, a, a);
+}
+#define FFX_BROADCAST_UINT16(a)   FfxUInt16(a)
+#define FFX_BROADCAST_UINT16X2(a) FfxUInt16x2(FfxUInt16(a))
+#define FFX_BROADCAST_UINT16X3(a) FfxUInt16x3(FfxUInt16(a))
+#define FFX_BROADCAST_UINT16X4(a) FfxUInt16x4(FfxUInt16(a))
+//==============================================================================================================================
+FfxUInt16 ffxAbsHalf(FfxUInt16 a)
+{
+    return FfxUInt16(abs(FfxInt16(a)));
+}
+FfxUInt16x2 ffxAbsHalf(FfxUInt16x2 a)
+{
+    return FfxUInt16x2(abs(FfxInt16x2(a)));
+}
+FfxUInt16x3 ffxAbsHalf(FfxUInt16x3 a)
+{
+    return FfxUInt16x3(abs(FfxInt16x3(a)));
+}
+FfxUInt16x4 ffxAbsHalf(FfxUInt16x4 a)
+{
+    return FfxUInt16x4(abs(FfxInt16x4(a)));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FfxFloat16 ffxClampHalf(FfxFloat16 x, FfxFloat16 n, FfxFloat16 m)
+{
+    return clamp(x, n, m);
+}
+FfxFloat16x2 ffxClampHalf(FfxFloat16x2 x, FfxFloat16x2 n, FfxFloat16x2 m)
+{
+    return clamp(x, n, m);
+}
+FfxFloat16x3 ffxClampHalf(FfxFloat16x3 x, FfxFloat16x3 n, FfxFloat16x3 m)
+{
+    return clamp(x, n, m);
+}
+FfxFloat16x4 ffxClampHalf(FfxFloat16x4 x, FfxFloat16x4 n, FfxFloat16x4 m)
+{
+    return clamp(x, n, m);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FfxFloat16 ffxFract(FfxFloat16 x)
+{
+    return fract(x);
+}
+FfxFloat16x2 ffxFract(FfxFloat16x2 x)
+{
+    return fract(x);
+}
+FfxFloat16x3 ffxFract(FfxFloat16x3 x)
+{
+    return fract(x);
+}
+FfxFloat16x4 ffxFract(FfxFloat16x4 x)
+{
+    return fract(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FfxFloat16 ffxLerp(FfxFloat16 x, FfxFloat16 y, FfxFloat16 a)
+{
+    return mix(x, y, a);
+}
+FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16 a)
+{
+    return mix(x, y, a);
+}
+FfxFloat16x2 ffxLerp(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 a)
+{
+    return mix(x, y, a);
+}
+FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 a)
+{
+    return mix(x, y, a);
+}
+FfxFloat16x3 ffxLerp(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16 a)
+{
+    return mix(x, y, a);
+}
+FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16 a)
+{
+    return mix(x, y, a);
+}
+FfxFloat16x4 ffxLerp(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 a)
+{
+    return mix(x, y, a);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+// No packed version of ffxMid3.
+FfxFloat16 ffxMed3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FfxFloat16x2 ffxMed3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FfxFloat16x3 ffxMed3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FfxFloat16x4 ffxMed3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FfxInt16 ffxMed3Half(FfxInt16 x, FfxInt16 y, FfxInt16 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FfxInt16x2 ffxMed3Half(FfxInt16x2 x, FfxInt16x2 y, FfxInt16x2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FfxInt16x3 ffxMed3Half(FfxInt16x3 x, FfxInt16x3 y, FfxInt16x3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FfxInt16x4 ffxMed3Half(FfxInt16x4 x, FfxInt16x4 y, FfxInt16x4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+// No packed version of ffxMax3.
+FfxFloat16 ffxMax3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+    return max(x, max(y, z));
+}
+FfxFloat16x2 ffxMax3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+    return max(x, max(y, z));
+}
+FfxFloat16x3 ffxMax3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+    return max(x, max(y, z));
+}
+FfxFloat16x4 ffxMax3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+    return max(x, max(y, z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+// No packed version of ffxMin3.
+FfxFloat16 ffxMin3Half(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+    return min(x, min(y, z));
+}
+FfxFloat16x2 ffxMin3Half(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+    return min(x, min(y, z));
+}
+FfxFloat16x3 ffxMin3Half(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+    return min(x, min(y, z));
+}
+FfxFloat16x4 ffxMin3Half(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+    return min(x, min(y, z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FfxFloat16 ffxReciprocalHalf(FfxFloat16 x)
+{
+    return FFX_BROADCAST_FLOAT16(1.0) / x;
+}
+FfxFloat16x2 ffxReciprocalHalf(FfxFloat16x2 x)
+{
+    return FFX_BROADCAST_FLOAT16X2(1.0) / x;
+}
+FfxFloat16x3 ffxReciprocalHalf(FfxFloat16x3 x)
+{
+    return FFX_BROADCAST_FLOAT16X3(1.0) / x;
+}
+FfxFloat16x4 ffxReciprocalHalf(FfxFloat16x4 x)
+{
+    return FFX_BROADCAST_FLOAT16X4(1.0) / x;
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FfxFloat16 ffxReciprocalSquareRootHalf(FfxFloat16 x)
+{
+    return FFX_BROADCAST_FLOAT16(1.0) / sqrt(x);
+}
+FfxFloat16x2 ffxReciprocalSquareRootHalf(FfxFloat16x2 x)
+{
+    return FFX_BROADCAST_FLOAT16X2(1.0) / sqrt(x);
+}
+FfxFloat16x3 ffxReciprocalSquareRootHalf(FfxFloat16x3 x)
+{
+    return FFX_BROADCAST_FLOAT16X3(1.0) / sqrt(x);
+}
+FfxFloat16x4 ffxReciprocalSquareRootHalf(FfxFloat16x4 x)
+{
+    return FFX_BROADCAST_FLOAT16X4(1.0) / sqrt(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FfxFloat16 ffxSaturate(FfxFloat16 x)
+{
+    return clamp(x, FFX_BROADCAST_FLOAT16(0.0), FFX_BROADCAST_FLOAT16(1.0));
+}
+FfxFloat16x2 ffxSaturate(FfxFloat16x2 x)
+{
+    return clamp(x, FFX_BROADCAST_FLOAT16X2(0.0), FFX_BROADCAST_FLOAT16X2(1.0));
+}
+FfxFloat16x3 ffxSaturate(FfxFloat16x3 x)
+{
+    return clamp(x, FFX_BROADCAST_FLOAT16X3(0.0), FFX_BROADCAST_FLOAT16X3(1.0));
+}
+FfxFloat16x4 ffxSaturate(FfxFloat16x4 x)
+{
+    return clamp(x, FFX_BROADCAST_FLOAT16X4(0.0), FFX_BROADCAST_FLOAT16X4(1.0));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FfxUInt16 ffxBitShiftRightHalf(FfxUInt16 a, FfxUInt16 b)
+{
+    return FfxUInt16(FfxInt16(a) >> FfxInt16(b));
+}
+FfxUInt16x2 ffxBitShiftRightHalf(FfxUInt16x2 a, FfxUInt16x2 b)
+{
+    return FfxUInt16x2(FfxInt16x2(a) >> FfxInt16x2(b));
+}
+FfxUInt16x3 ffxBitShiftRightHalf(FfxUInt16x3 a, FfxUInt16x3 b)
+{
+    return FfxUInt16x3(FfxInt16x3(a) >> FfxInt16x3(b));
+}
+FfxUInt16x4 ffxBitShiftRightHalf(FfxUInt16x4 a, FfxUInt16x4 b)
+{
+    return FfxUInt16x4(FfxInt16x4(a) >> FfxInt16x4(b));
+}
+#endif // FFX_HALF
+
+#if defined(FFX_WAVE)
+// Where 'x' must be a compile time literal.
+FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
+{
+    return subgroupShuffleXor(v, x);
+}
+FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
+{
+    return subgroupShuffleXor(v, x);
+}
+FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
+{
+    return subgroupShuffleXor(v, x);
+}
+FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
+{
+    return subgroupShuffleXor(v, x);
+}
+FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
+{
+    return subgroupShuffleXor(v, x);
+}
+FfxUInt32x2 AWaveXorU2(FfxUInt32x2 v, FfxUInt32 x)
+{
+    return subgroupShuffleXor(v, x);
+}
+FfxUInt32x3 AWaveXorU3(FfxUInt32x3 v, FfxUInt32 x)
+{
+    return subgroupShuffleXor(v, x);
+}
+FfxUInt32x4 AWaveXorU4(FfxUInt32x4 v, FfxUInt32 x)
+{
+    return subgroupShuffleXor(v, x);
+}
+
+//------------------------------------------------------------------------------------------------------------------------------
+#if FFX_HALF
+FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
+{
+    return FFX_UINT32_TO_FLOAT16X2(subgroupShuffleXor(FFX_FLOAT16X2_TO_UINT32(v), x));
+}
+FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
+{
+    return FFX_UINT32X2_TO_FLOAT16X4(subgroupShuffleXor(FFX_FLOAT16X4_TO_UINT32X2(v), x));
+}
+FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
+{
+    return FFX_UINT32_TO_UINT16X2(subgroupShuffleXor(FFX_UINT16X2_TO_UINT32(v), x));
+}
+FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
+{
+    return FFX_UINT32X2_TO_UINT16X4(subgroupShuffleXor(FFX_UINT16X4_TO_UINT32X2(v), x));
+}
+#endif // FFX_HALF
+#endif // #if defined(FFX_WAVE)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h b/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
new file mode 100644
index 0000000000..ae07642f0d
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common.h
@@ -0,0 +1,2784 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+/// A define for a true value in a boolean expression.
+///
+/// @ingroup GPU
+#define FFX_TRUE (true)
+
+/// A define for a false value in a boolean expression.
+///
+/// @ingroup GPU
+#define FFX_FALSE (false)
+
+/// A define value for positive infinity.
+///
+/// @ingroup GPU
+#define FFX_POSITIVE_INFINITY_FLOAT ffxAsFloat(0x7f800000u)
+
+/// A define value for negative infinity.
+///
+/// @ingroup GPU
+#define FFX_NEGATIVE_INFINITY_FLOAT ffxAsFloat(0xff800000u)
+
+/// A define value for PI.
+/// 
+/// @ingroup GPU
+#define FFX_PI  (3.14159)
+
+
+/// Compute the reciprocal of <c><i>value</i></c>.
+///
+/// @param [in] value               The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / <c><i>value</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32 ffxReciprocal(FfxFloat32 value)
+{
+    return rcp(value);
+}
+
+/// Compute the reciprocal of <c><i>value</i></c>.
+///
+/// @param [in] value               The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / <c><i>value</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxReciprocal(FfxFloat32x2 value)
+{
+    return rcp(value);
+}
+
+/// Compute the reciprocal of <c><i>value</i></c>.
+///
+/// @param [in] value               The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / <c><i>value</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxReciprocal(FfxFloat32x3 value)
+{
+    return rcp(value);
+}
+
+/// Compute the reciprocal of <c><i>value</i></c>.
+///
+/// @param [in] value               The value to compute the reciprocal of.
+///
+/// @returns
+/// The 1 / <c><i>value</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxReciprocal(FfxFloat32x4 value)
+{
+    return rcp(value);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat32 ffxMin(FfxFloat32 x, FfxFloat32 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxMin(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxMin(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxMin(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt32 ffxMin(FfxInt32 x, FfxInt32 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt32x2 ffxMin(FfxInt32x2 x, FfxInt32x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt32x3 ffxMin(FfxInt32x3 x, FfxInt32x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt32x4 ffxMin(FfxInt32x4 x, FfxInt32x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt32 ffxMin(FfxUInt32 x, FfxUInt32 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxMin(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt32x3 ffxMin(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt32x4 ffxMin(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat32 ffxMax(FfxFloat32 x, FfxFloat32 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxMax(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxMax(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxMax(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt32 ffxMax(FfxInt32 x, FfxInt32 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt32x2 ffxMax(FfxInt32x2 x, FfxInt32x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt32x3 ffxMax(FfxInt32x3 x, FfxInt32x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt32x4 ffxMax(FfxInt32x4 x, FfxInt32x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt32 ffxMax(FfxUInt32 x, FfxUInt32 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxMax(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt32x3 ffxMax(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt32x4 ffxMax(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPU
+FfxFloat32 ffxPow(FfxFloat32 x, FfxFloat32 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxPow(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxPow(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxPow(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32 ffxSqrt(FfxFloat32 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxSqrt(FfxFloat32x2 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxSqrt(FfxFloat32x3 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxSqrt(FfxFloat32x4 x)
+{
+    return sqrt(x);
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPU
+FfxFloat32 ffxCopySignBit(FfxFloat32 d, FfxFloat32 s)
+{
+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & FfxUInt32(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+///
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxCopySignBit(FfxFloat32x2 d, FfxFloat32x2 s)
+{
+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast2(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+///
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxCopySignBit(FfxFloat32x3 d, FfxFloat32x3 s)
+{
+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast3(0x80000000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+///
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxCopySignBit(FfxFloat32x4 d, FfxFloat32x4 s)
+{
+    return ffxAsFloat(ffxAsUInt32(d) | (ffxAsUInt32(s) & ffxBroadcast4(0x80000000u)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPU
+FfxFloat32 ffxIsSigned(FfxFloat32 m)
+{
+    return ffxSaturate(m * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxIsSigned(FfxFloat32x2 m)
+{
+    return ffxSaturate(m * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against 0.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxIsSigned(FfxFloat32x3 m)
+{
+    return ffxSaturate(m * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against for have the sign set.
+///
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or positive.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxIsSigned(FfxFloat32x4 m)
+{
+    return ffxSaturate(m * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPU
+FfxFloat32 ffxIsGreaterThanZero(FfxFloat32 m)
+{
+    return ffxSaturate(m * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxIsGreaterThanZero(FfxFloat32x2 m)
+{
+    return ffxSaturate(m * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxIsGreaterThanZero(FfxFloat32x3 m)
+{
+    return ffxSaturate(m * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxIsGreaterThanZero(FfxFloat32x4 m)
+{
+    return ffxSaturate(m * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Convert a 32bit floating point value to sortable integer.
+/// 
+///  - If sign bit=0, flip the sign bit (positives).
+///  - If sign bit=1, flip all bits     (negatives).
+/// 
+/// The function has the side effects that:
+///  - Larger integers are more positive values.
+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+/// 
+/// @param [in] value                       The floating point value to make sortable.
+/// 
+/// @returns
+/// The sortable integer value.
+/// 
+/// @ingroup GPU
+FfxUInt32 ffxFloatToSortableInteger(FfxUInt32 value)
+{
+    return value ^ ((AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
+}
+
+/// Convert a sortable integer to a 32bit floating point value.
+///
+/// The function has the side effects that:
+///  - If sign bit=1, flip the sign bit (positives).
+///  - If sign bit=0, flip all bits     (negatives).
+///
+/// @param [in] value                       The floating point value to make sortable.
+///
+/// @returns
+/// The sortable integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxSortableIntegerToFloat(FfxUInt32 value)
+{
+    return value ^ ((~AShrSU1(value, FfxUInt32(31))) | FfxUInt32(0x80000000));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent 
+/// presentation materials:
+/// 
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+/// 
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateSqrt(FfxFloat32 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(1)) + FfxUInt32(0x1fbc4639));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateReciprocal(FfxFloat32 a)
+{
+    return ffxAsFloat(FfxUInt32(0x7ef07ebb) - ffxAsUInt32(a));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+/// 
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+/// 
+/// @ingroup GPU
+FfxFloat32 ffxApproximateReciprocalMedium(FfxFloat32 value)
+{
+    FfxFloat32 b = ffxAsFloat(FfxUInt32(0x7ef19fff) - ffxAsUInt32(value));
+    return b * (-b * value + FfxFloat32(2.0));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal square root for.
+///
+/// @returns
+/// An approximation of the reciprocal square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateReciprocalSquareRoot(FfxFloat32 a)
+{
+    return ffxAsFloat(FfxUInt32(0x5f347d74) - (ffxAsUInt32(a) >> FfxUInt32(1)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateSqrt(FfxFloat32x2 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(1u)) + ffxBroadcast2(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateReciprocal(FfxFloat32x2 a)
+{
+    return ffxAsFloat(ffxBroadcast2(0x7ef07ebbu) - ffxAsUInt32(a));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateReciprocalMedium(FfxFloat32x2 a)
+{
+    FfxFloat32x2 b = ffxAsFloat(ffxBroadcast2(0x7ef19fffu) - ffxAsUInt32(a));
+    return b * (-b * a + ffxBroadcast2(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateReciprocalSquareRoot(FfxFloat32x2 a)
+{
+    return ffxAsFloat(ffxBroadcast2(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast2(1u)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateSqrt(FfxFloat32x3 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(1u)) + ffxBroadcast3(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateReciprocal(FfxFloat32x3 a)
+{
+    return ffxAsFloat(ffxBroadcast3(0x7ef07ebbu) - ffxAsUInt32(a));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateReciprocalMedium(FfxFloat32x3 a)
+{
+    FfxFloat32x3 b = ffxAsFloat(ffxBroadcast3(0x7ef19fffu) - ffxAsUInt32(a));
+    return b * (-b * a + ffxBroadcast3(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateReciprocalSquareRoot(FfxFloat32x3 a)
+{
+    return ffxAsFloat(ffxBroadcast3(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast3(1u)));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateSqrt(FfxFloat32x4 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(1u)) + ffxBroadcast4(0x1fbc4639u));
+}
+
+/// Calculate a low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateReciprocal(FfxFloat32x4 a)
+{
+    return ffxAsFloat(ffxBroadcast4(0x7ef07ebbu) - ffxAsUInt32(a));
+}
+
+/// Calculate a medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateReciprocalMedium(FfxFloat32x4 a)
+{
+    FfxFloat32x4 b = ffxAsFloat(ffxBroadcast4(0x7ef19fffu) - ffxAsUInt32(a));
+    return b * (-b * a + ffxBroadcast4(2.0f));
+}
+
+/// Calculate a low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] value           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateReciprocalSquareRoot(FfxFloat32x4 a)
+{
+    return ffxAsFloat(ffxBroadcast4(0x5f347d74u) - (ffxAsUInt32(a) >> ffxBroadcast4(1u)));
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a                   First vector input.
+/// @param [in] b                   Second vector input.
+///
+/// @returns
+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32 ffxDot2(FfxFloat32x2 a, FfxFloat32x2 b)
+{
+    return dot(a, b);
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a                   First vector input.
+/// @param [in] b                   Second vector input.
+///
+/// @returns
+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32 ffxDot3(FfxFloat32x3 a, FfxFloat32x3 b)
+{
+    return dot(a, b);
+}
+
+/// Calculate dot product of 'a' and 'b'.
+///
+/// @param [in] a                   First vector input.
+/// @param [in] b                   Second vector input.
+///
+/// @returns
+/// The value of <c><i>a</i></c> dot <c><i>b</i></c>.
+///
+/// @ingroup GPU
+FfxFloat32 ffxDot4(FfxFloat32x4 a, FfxFloat32x4 b)
+{
+    return dot(a, b);
+}
+
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear 
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into Gamma2.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximatePQToGamma2Medium(FfxFloat32 a)
+{
+    return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and linear.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into linear.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximatePQToLinear(FfxFloat32 a)
+{
+    return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateGamma2ToPQ(FfxFloat32 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateGamma2ToPQMedium(FfxFloat32 a)
+{
+    FfxFloat32 b  = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(2)) + FfxUInt32(0x2F9A4E46));
+    FfxFloat32 b4 = b * b * b * b;
+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateGamma2ToPQHigh(FfxFloat32 a)
+{
+    return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateLinearToPQ(FfxFloat32 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateLinearToPQMedium(FfxFloat32 a)
+{
+    FfxFloat32 b  = ffxAsFloat((ffxAsUInt32(a) >> FfxUInt32(3)) + FfxUInt32(0x378D8723));
+    FfxFloat32 b8 = b * b * b * b * b * b * b * b;
+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32 ffxApproximateLinearToPQHigh(FfxFloat32 a)
+{
+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into Gamma2.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximatePQToGamma2Medium(FfxFloat32x2 a)
+{
+    return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and linear.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into linear.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximatePQToLinear(FfxFloat32x2 a)
+{
+    return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateGamma2ToPQ(FfxFloat32x2 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateGamma2ToPQMedium(FfxFloat32x2 a)
+{
+    FfxFloat32x2 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(2u)) + ffxBroadcast2(0x2F9A4E46u));
+    FfxFloat32x2 b4 = b * b * b * b;
+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateGamma2ToPQHigh(FfxFloat32x2 a)
+{
+    return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateLinearToPQ(FfxFloat32x2 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateLinearToPQMedium(FfxFloat32x2 a)
+{
+    FfxFloat32x2 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast2(3u)) + ffxBroadcast2(0x378D8723u));
+    FfxFloat32x2 b8 = b * b * b * b * b * b * b * b;
+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxApproximateLinearToPQHigh(FfxFloat32x2 a)
+{
+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into Gamma2.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximatePQToGamma2Medium(FfxFloat32x3 a)
+{
+    return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and linear.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into linear.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximatePQToLinear(FfxFloat32x3 a)
+{
+    return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateGamma2ToPQ(FfxFloat32x3 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateGamma2ToPQMedium(FfxFloat32x3 a)
+{
+    FfxFloat32x3 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(2u)) + ffxBroadcast3(0x2F9A4E46u));
+    FfxFloat32x3 b4 = b * b * b * b;
+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateGamma2ToPQHigh(FfxFloat32x3 a)
+{
+    return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateLinearToPQ(FfxFloat32x3 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateLinearToPQMedium(FfxFloat32x3 a)
+{
+    FfxFloat32x3 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast3(3u)) + ffxBroadcast3(0x378D8723u));
+    FfxFloat32x3 b8 = b * b * b * b * b * b * b * b;
+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxApproximateLinearToPQHigh(FfxFloat32x3 a)
+{
+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+/// Compute an approximate conversion from PQ to Gamma2 space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and Gamma2.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into Gamma2.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximatePQToGamma2Medium(FfxFloat32x4 a)
+{
+    return a * a * a * a;
+}
+
+/// Compute an approximate conversion from PQ to linear space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between PQ and linear.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into linear.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximatePQToLinear(FfxFloat32x4 a)
+{
+    return a * a * a * a * a * a * a * a;
+}
+
+/// Compute an approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateGamma2ToPQ(FfxFloat32x4 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
+}
+
+/// Compute a more accurate approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateGamma2ToPQMedium(FfxFloat32x4 a)
+{
+    FfxFloat32x4 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(2u)) + ffxBroadcast4(0x2F9A4E46u));
+    FfxFloat32x4 b4 = b * b * b * b * b * b * b * b;
+    return b - b * (b4 - a) / (FfxFloat32(4.0) * b4);
+}
+
+/// Compute a high accuracy approximate conversion from gamma2 to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between gamma2 and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateGamma2ToPQHigh(FfxFloat32x4 a)
+{
+    return ffxSqrt(ffxSqrt(a));
+}
+
+/// Compute an approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateLinearToPQ(FfxFloat32x4 a)
+{
+    return ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
+}
+
+/// Compute a more accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateLinearToPQMedium(FfxFloat32x4 a)
+{
+    FfxFloat32x4 b  = ffxAsFloat((ffxAsUInt32(a) >> ffxBroadcast4(3u)) + ffxBroadcast4(0x378D8723u));
+    FfxFloat32x4 b8 = b * b * b * b * b * b * b * b;
+    return b - b * (b8 - a) / (FfxFloat32(8.0) * b8);
+}
+
+/// Compute a very accurate approximate conversion from linear to PQ space.
+///
+/// PQ is very close to x^(1/8). The functions below Use the fast FfxFloat32 approximation method to do
+/// PQ conversions to and from Gamma2 (4th power and fast 4th root), and PQ to and from Linear
+/// (8th power and fast 8th root). The maximum error is approximately 0.2%.
+///
+/// @param a                    The value to convert between linear and PQ.
+///
+/// @returns
+/// The value <c><i>a</i></c> converted into PQ.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxApproximateLinearToPQHigh(FfxFloat32x4 a)
+{
+    return ffxSqrt(ffxSqrt(ffxSqrt(a)));
+}
+
+// An approximation of sine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range 
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate sine for.
+//
+// @returns
+// The approximate sine of <c><i>value</i></c>.
+FfxFloat32 ffxParabolicSin(FfxFloat32 value)
+{
+    return value * abs(value) - value;
+}
+
+// An approximation of sine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate sine for.
+//
+// @returns
+// The approximate sine of <c><i>value</i></c>.
+FfxFloat32x2 ffxParabolicSin(FfxFloat32x2 x)
+{
+    return x * abs(x) - x;
+}
+
+// An approximation of cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate cosine for.
+//
+// @returns
+// The approximate cosine of <c><i>value</i></c>.
+FfxFloat32 ffxParabolicCos(FfxFloat32 x)
+{
+    x = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75));
+    x = x * FfxFloat32(2.0) - FfxFloat32(1.0);
+    return ffxParabolicSin(x);
+}
+
+// An approximation of cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate cosine for.
+//
+// @returns
+// The approximate cosine of <c><i>value</i></c>.
+FfxFloat32x2 ffxParabolicCos(FfxFloat32x2 x)
+{
+    x = ffxFract(x * ffxBroadcast2(0.5f) + ffxBroadcast2(0.75f));
+    x = x * ffxBroadcast2(2.0f) - ffxBroadcast2(1.0f);
+    return ffxParabolicSin(x);
+}
+
+// An approximation of both sine and cosine.
+//
+// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+// is {-1/4 to 1/4} representing {-1 to 1}.
+//
+// @param [in] value            The value to calculate approximate cosine for.
+//
+// @returns
+// A <c><i>FfxFloat32x2</i></c> containing approximations of both sine and cosine of <c><i>value</i></c>.
+FfxFloat32x2 ffxParabolicSinCos(FfxFloat32 x)
+{
+    FfxFloat32 y = ffxFract(x * FfxFloat32(0.5) + FfxFloat32(0.75));
+    y = y * FfxFloat32(2.0) - FfxFloat32(1.0);
+    return ffxParabolicSin(FfxFloat32x2(x, y));
+}
+
+/// Conditional free logic AND operation using values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxUInt32 ffxZeroOneAnd(FfxUInt32 x, FfxUInt32 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPU
+FfxUInt32 ffxZeroOneAnd(FfxUInt32 x)
+{
+    return x ^ FfxUInt32(1);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxZeroOneAnd(FfxUInt32x2 x)
+{
+    return x ^ ffxBroadcast2(1u);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPU
+FfxUInt32x3 ffxZeroOneAnd(FfxUInt32x3 x)
+{
+    return x ^ ffxBroadcast3(1u);
+}
+
+/// Conditional free logic NOT operation using two values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPU
+FfxUInt32x4 ffxZeroOneAnd(FfxUInt32x4 x)
+{
+    return x ^ ffxBroadcast4(1u);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxUInt32 ffxZeroOneOr(FfxUInt32 x, FfxUInt32 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxZeroOneOr(FfxUInt32x2 x, FfxUInt32x2 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxUInt32x3 ffxZeroOneOr(FfxUInt32x3 x, FfxUInt32x3 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxUInt32x4 ffxZeroOneOr(FfxUInt32x4 x, FfxUInt32x4 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxUInt32 ffxZeroOneAndToU1(FfxFloat32 x)
+{
+    return FfxUInt32(FfxFloat32(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxZeroOneAndToU2(FfxFloat32x2 x)
+{
+    return FfxUInt32x2(ffxBroadcast2(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxUInt32x3 ffxZeroOneAndToU3(FfxFloat32x3 x)
+{
+    return FfxUInt32x3(ffxBroadcast3(1.0) - x);
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxUInt32x4 ffxZeroOneAndToU4(FfxFloat32x4 x)
+{
+    return FfxUInt32x4(ffxBroadcast4(1.0) - x);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat32 ffxZeroOneAndOr(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxZeroOneAndOr(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation
+/// using the resulting value and a third value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxZeroOneAndOr(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two values followed by a NOT operation 
+/// using the resulting value and a third value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxZeroOneAndOr(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPU
+FfxFloat32 ffxZeroOneIsGreaterThanZero(FfxFloat32 x)
+{
+    return ffxSaturate(x * FfxFloat32(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxZeroOneIsGreaterThanZero(FfxFloat32x2 x)
+{
+    return ffxSaturate(x * ffxBroadcast2(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxZeroOneIsGreaterThanZero(FfxFloat32x3 x)
+{
+    return ffxSaturate(x * ffxBroadcast3(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxZeroOneIsGreaterThanZero(FfxFloat32x4 x)
+{
+    return ffxSaturate(x * ffxBroadcast4(FFX_POSITIVE_INFINITY_FLOAT));
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat32 ffxZeroOneAnd(FfxFloat32 x)
+{
+    return FfxFloat32(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxZeroOneAnd(FfxFloat32x2 x)
+{
+    return ffxBroadcast2(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxZeroOneAnd(FfxFloat32x3 x)
+{
+    return ffxBroadcast3(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxZeroOneAnd(FfxFloat32x4 x)
+{
+    return ffxBroadcast4(1.0) - x;
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxFloat32 ffxZeroOneOr(FfxFloat32 x, FfxFloat32 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxZeroOneOr(FfxFloat32x2 x, FfxFloat32x2 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxZeroOneOr(FfxFloat32x3 x, FfxFloat32x3 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxZeroOneOr(FfxFloat32x4 x, FfxFloat32x4 y)
+{
+    return max(x, y);
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPU
+FfxFloat32 ffxZeroOneSelect(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    FfxFloat32 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxZeroOneSelect(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    FfxFloat32x2 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxZeroOneSelect(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    FfxFloat32x3 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxZeroOneSelect(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    FfxFloat32x4 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPU
+FfxFloat32 ffxZeroOneIsSigned(FfxFloat32 x)
+{
+    return ffxSaturate(x * FfxFloat32(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxZeroOneIsSigned(FfxFloat32x2 x)
+{
+    return ffxSaturate(x * ffxBroadcast2(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxZeroOneIsSigned(FfxFloat32x3 x)
+{
+    return ffxSaturate(x * ffxBroadcast3(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Given a value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPU
+FfxFloat32x4 ffxZeroOneIsSigned(FfxFloat32x4 x)
+{
+    return ffxSaturate(x * ffxBroadcast4(FFX_NEGATIVE_INFINITY_FLOAT));
+}
+
+/// Compute a Rec.709 color space.
+/// 
+/// Rec.709 is used for some HDTVs.
+/// 
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color           The color to convert to Rec. 709.
+/// 
+/// @returns
+/// The <c><i>color</i></c> in linear space.
+/// 
+/// @ingroup GPU
+FfxFloat32 ffxRec709FromLinear(FfxFloat32 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+    return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y);
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color           The color to convert to Rec. 709.
+///
+/// @returns
+/// The <c><i>color</i></c> in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxRec709FromLinear(FfxFloat32x2 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+    return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a Rec.709 color space.
+///
+/// Rec.709 is used for some HDTVs.
+///
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] color           The color to convert to Rec. 709.
+///
+/// @returns
+/// The <c><i>color</i></c> in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxRec709FromLinear(FfxFloat32x3 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.099, -0.099);
+    return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
+/// 
+/// @param [in] value           The value to convert to gamma space from linear.
+/// @param [in] power           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPU
+FfxFloat32 ffxGammaFromLinear(FfxFloat32 color, FfxFloat32 rcpX)
+{
+    return pow(color, FfxFloat32(rcpX));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
+///
+/// @param [in] value           The value to convert to gamma space from linear.
+/// @param [in] power           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxGammaFromLinear(FfxFloat32x2 color, FfxFloat32 rcpX)
+{
+    return pow(color, ffxBroadcast2(rcpX));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGamma</i></c>.
+///
+/// @param [in] value           The value to convert to gamma space from linear.
+/// @param [in] power           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxGammaFromLinear(FfxFloat32x3 color, FfxFloat32 rcpX)
+{
+    return pow(color, ffxBroadcast3(rcpX));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value           The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32 ffxPQToLinear(FfxFloat32 x)
+{
+    FfxFloat32 p = pow(x, FfxFloat32(0.159302));
+    return pow((FfxFloat32(0.835938) + FfxFloat32(18.8516) * p) / (FfxFloat32(1.0) + FfxFloat32(18.6875) * p), FfxFloat32(78.8438));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value           The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxPQToLinear(FfxFloat32x2 x)
+{
+    FfxFloat32x2 p = pow(x, ffxBroadcast2(0.159302));
+    return pow((ffxBroadcast2(0.835938) + ffxBroadcast2(18.8516) * p) / (ffxBroadcast2(1.0) + ffxBroadcast2(18.6875) * p), ffxBroadcast2(78.8438));
+}
+
+/// Compute a PQ value from a linear value.
+///
+/// @param [in] value           The value to convert to PQ from linear.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxPQToLinear(FfxFloat32x3 x)
+{
+    FfxFloat32x3 p = pow(x, ffxBroadcast3(0.159302));
+    return pow((ffxBroadcast3(0.835938) + ffxBroadcast3(18.8516) * p) / (ffxBroadcast3(1.0) + ffxBroadcast3(18.6875) * p), ffxBroadcast3(78.8438));
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value           The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPU
+FfxFloat32 ffxSrgbToLinear(FfxFloat32 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+    return clamp(j.x, color * j.y, pow(color, j.z) * k.x + k.y);
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value           The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxSrgbToLinear(FfxFloat32x2 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+    return clamp(j.xx, color * j.yy, pow(color, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a linear value from a SRGB value.
+///
+/// @param [in] value           The value to convert to linear from SRGB.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxSrgbToLinear(FfxFloat32x3 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.055, -0.055);
+    return clamp(j.xxx, color * j.yyy, pow(color, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32 ffxLinearFromRec709(FfxFloat32 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxLinearFromRec709(FfxFloat32x2 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] color           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxLinearFromRec709(FfxFloat32x3 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color           The value to convert to linear in gamma space.
+/// @param [in] power           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32 ffxLinearFromGamma(FfxFloat32 color, FfxFloat32 power)
+{
+    return pow(color, FfxFloat32(power));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color           The value to convert to linear in gamma space.
+/// @param [in] power           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxLinearFromGamma(FfxFloat32x2 color, FfxFloat32 power)
+{
+    return pow(color, ffxBroadcast2(power));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] color           The value to convert to linear in gamma space.
+/// @param [in] power           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxLinearFromGamma(FfxFloat32x3 color, FfxFloat32 power)
+{
+    return pow(color, ffxBroadcast3(power));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32 ffxLinearFromPQ(FfxFloat32 x)
+{
+    FfxFloat32 p = pow(x, FfxFloat32(0.0126833));
+    return pow(ffxSaturate(p - FfxFloat32(0.835938)) / (FfxFloat32(18.8516) - FfxFloat32(18.6875) * p), FfxFloat32(6.27739));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxLinearFromPQ(FfxFloat32x2 x)
+{
+    FfxFloat32x2 p = pow(x, ffxBroadcast2(0.0126833));
+    return pow(ffxSaturate(p - ffxBroadcast2(0.835938)) / (ffxBroadcast2(18.8516) - ffxBroadcast2(18.6875) * p), ffxBroadcast2(6.27739));
+}
+
+/// Compute a linear value from a value in a PQ space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in PQ space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxLinearFromPQ(FfxFloat32x3 x)
+{
+    FfxFloat32x3 p = pow(x, ffxBroadcast3(0.0126833));
+    return pow(ffxSaturate(p - ffxBroadcast3(0.835938)) / (ffxBroadcast3(18.8516) - ffxBroadcast3(18.6875) * p), ffxBroadcast3(6.27739));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32 ffxLinearFromSrgb(FfxFloat32 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.x), color * j.y, pow(color * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x2 ffxLinearFromSrgb(FfxFloat32x2 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xx), color * j.yy, pow(color * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] value           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat32x3 ffxLinearFromSrgb(FfxFloat32x3 color)
+{
+    FfxFloat32x3 j = FfxFloat32x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat32x2 k = FfxFloat32x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelect(ffxZeroOneIsSigned(color - j.xxx), color * j.yyy, pow(color * k.xxx + k.yyy, j.zzz));
+}
+
+/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
+/// 
+///  543210
+///  ======
+///  ..xxx.
+///  yy...y
+/// 
+/// @param [in] a       The input 1D coordinates to remap.
+///
+/// @returns
+/// The remapped 2D coordinates.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxRemapForQuad(FfxUInt32 a)
+{
+    return FfxUInt32x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
+}
+
+/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
+///
+/// The 64-wide lane indices to 8x8 remapping is performed as follows:
+/// 
+///     00 01 08 09 10 11 18 19
+///     02 03 0a 0b 12 13 1a 1b
+///     04 05 0c 0d 14 15 1c 1d
+///     06 07 0e 0f 16 17 1e 1f
+///     20 21 28 29 30 31 38 39
+///     22 23 2a 2b 32 33 3a 3b
+///     24 25 2c 2d 34 35 3c 3d
+///     26 27 2e 2f 36 37 3e 3f
+///
+/// @param [in] a       The input 1D coordinate to remap.
+/// 
+/// @returns
+/// The remapped 2D coordinates.
+/// 
+/// @ingroup GPU
+FfxUInt32x2 ffxRemapForWaveReduction(FfxUInt32 a)
+{
+    return FfxUInt32x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h b/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
new file mode 100644
index 0000000000..c46ccb3657
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_gpu_common_half.h
@@ -0,0 +1,2978 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#if FFX_HALF
+#if FFX_HLSL_6_2
+/// A define value for 16bit positive infinity.
+///
+/// @ingroup GPU
+#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0x7c00u)
+
+/// A define value for 16bit negative infinity.
+///
+/// @ingroup GPU
+#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16((uint16_t)0xfc00u)
+#else
+/// A define value for 16bit positive infinity.
+///
+/// @ingroup GPU
+#define FFX_POSITIVE_INFINITY_HALF FFX_TO_FLOAT16(0x7c00u)
+
+/// A define value for 16bit negative infinity.
+///
+/// @ingroup GPU
+#define FFX_NEGATIVE_INFINITY_HALF FFX_TO_FLOAT16(0xfc00u)
+#endif // FFX_HLSL_6_2
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat16 ffxMin(FfxFloat16 x, FfxFloat16 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxMin(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxMin(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxMin(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt16 ffxMin(FfxInt16 x, FfxInt16 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt16x2 ffxMin(FfxInt16x2 x, FfxInt16x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt16x3 ffxMin(FfxInt16x3 x, FfxInt16x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt16x4 ffxMin(FfxInt16x4 x, FfxInt16x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt16 ffxMin(FfxUInt16 x, FfxUInt16 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxMin(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt16x3 ffxMin(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+    return min(x, y);
+}
+
+/// Compute the min of two values.
+///
+/// @param [in] x                   The first value to compute the min of.
+/// @param [in] y                   The second value to compute the min of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt16x4 ffxMin(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+    return min(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat16 ffxMax(FfxFloat16 x, FfxFloat16 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxMax(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxMax(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxMax(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt16 ffxMax(FfxInt16 x, FfxInt16 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt16x2 ffxMax(FfxInt16x2 x, FfxInt16x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt16x3 ffxMax(FfxInt16x3 x, FfxInt16x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxInt16x4 ffxMax(FfxInt16x4 x, FfxInt16x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt16 ffxMax(FfxUInt16 x, FfxUInt16 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxMax(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt16x3 ffxMax(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+    return max(x, y);
+}
+
+/// Compute the max of two values.
+///
+/// @param [in] x                   The first value to compute the max of.
+/// @param [in] y                   The second value to compute the max of.
+///
+/// @returns
+/// The the lowest of two values.
+///
+/// @ingroup GPU
+FfxUInt16x4 ffxMax(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+    return max(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPU
+FfxFloat16 ffxPow(FfxFloat16 x, FfxFloat16 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPow(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxPow(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the value of the first parameter raised to the power of the second.
+///
+/// @param [in] x                   The value to raise to the power y.
+/// @param [in] y                   The power to which to raise x.
+///
+/// @returns
+/// The value of the first parameter raised to the power of the second.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxPow(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return pow(x, y);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPU
+FfxFloat16 ffxSqrt(FfxFloat16 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxSqrt(FfxFloat16x2 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxSqrt(FfxFloat16x3 x)
+{
+    return sqrt(x);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] x                   The first value to compute the min of.
+///
+/// @returns
+/// The the square root of <c><i>x</i></c>.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxSqrt(FfxFloat16x4 x)
+{
+    return sqrt(x);
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPU
+FfxFloat16 ffxCopySignBitHalf(FfxFloat16 d, FfxFloat16 s)
+{
+    return FFX_TO_FLOAT16(FFX_TO_UINT16(d) | (FFX_TO_UINT16(s) & FFX_BROADCAST_UINT16(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPU
+FfxFloat16x2 ffxCopySignBitHalf(FfxFloat16x2 d, FfxFloat16x2 s)
+{
+    return FFX_TO_FLOAT16X2(FFX_TO_UINT16X2(d) | (FFX_TO_UINT16X2(s) & FFX_BROADCAST_UINT16X2(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPU
+FfxFloat16x3 ffxCopySignBitHalf(FfxFloat16x3 d, FfxFloat16x3 s)
+{
+    return FFX_TO_FLOAT16X3(FFX_TO_UINT16X3(d) | (FFX_TO_UINT16X3(s) & FFX_BROADCAST_UINT16X3(0x8000u)));
+}
+
+/// Copy the sign bit from 's' to positive 'd'.
+///
+/// @param [in] d                   The value to copy the sign bit into.
+/// @param [in] s                   The value to copy the sign bit from.
+/// 
+/// @returns
+/// The value of <c><i>d</i></c> with the sign bit from <c><i>s</i></c>.
+/// 
+/// @ingroup GPU
+FfxFloat16x4 ffxCopySignBitHalf(FfxFloat16x4 d, FfxFloat16x4 s)
+{
+    return FFX_TO_FLOAT16X4(FFX_TO_UINT16X4(d) | (FFX_TO_UINT16X4(s) & FFX_BROADCAST_UINT16X4(0x8000u)));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPU
+FfxFloat16 ffxIsSignedHalf(FfxFloat16 m)
+{
+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPU
+FfxFloat16x2 ffxIsSignedHalf(FfxFloat16x2 m)
+{
+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPU
+FfxFloat16x3 ffxIsSignedHalf(FfxFloat16x3 m)
+{
+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 0
+///     m >= 0  := 0
+///     m < 0   := 1
+///
+/// Uses the following useful floating point logic,
+///     saturate(+a*(-INF)==-INF) := 0
+///     saturate( 0*(-INF)== NaN) := 0
+///     saturate(-a*(-INF)==+INF) := 1
+/// 
+/// This function is useful when creating masks for branch-free logic.
+/// 
+/// @param [in] m                       The value to test against 0.
+/// 
+/// @returns
+/// 1.0 when the value is negative, or 0.0 when the value is 0 or position.
+/// 
+/// @ingroup GPU
+FfxFloat16x4 ffxIsSignedHalf(FfxFloat16x4 m)
+{
+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPU
+FfxFloat16 ffxIsGreaterThanZeroHalf(FfxFloat16 m)
+{
+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxIsGreaterThanZeroHalf(FfxFloat16x2 m)
+{
+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxIsGreaterThanZeroHalf(FfxFloat16x3 m)
+{
+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF));
+}
+
+/// A single operation to return the following:
+///     m = NaN := 1
+///     m > 0   := 0
+///     m <= 0  := 1
+///
+/// This function is useful when creating masks for branch-free logic.
+///
+/// @param [in] m                       The value to test against zero.
+///
+/// @returns
+/// 1.0 when the value is position, or 0.0 when the value is 0 or negative.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxIsGreaterThanZeroHalf(FfxFloat16x4 m)
+{
+    return ffxSaturate(m * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF));
+}
+
+/// Convert a 16bit floating point value to sortable integer.
+/// 
+///  - If sign bit=0, flip the sign bit (positives).
+///  - If sign bit=1, flip all bits     (negatives).
+/// 
+/// The function has the side effects that:
+///  - Larger integers are more positive values.
+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+/// 
+/// @param [in] x                       The floating point value to make sortable.
+/// 
+/// @returns
+/// The sortable integer value.
+/// 
+/// @ingroup GPU
+FfxUInt16 ffxFloatToSortableIntegerHalf(FfxUInt16 x)
+{
+    return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000));
+}
+
+/// Convert a sortable integer to a 16bit floating point value.
+///
+/// The function has the side effects that:
+///  - If sign bit=1, flip the sign bit (positives).
+///  - If sign bit=0, flip all bits     (negatives).
+///
+/// @param [in] x                       The sortable integer value to make floating point.
+///
+/// @returns
+/// The floating point value.
+///
+/// @ingroup GPU
+FfxUInt16 ffxSortableIntegerToFloatHalf(FfxUInt16 x)
+{
+    return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16(15))) | FFX_BROADCAST_UINT16(0x8000));
+}
+
+/// Convert a pair of 16bit floating point values to a pair of sortable integers.
+/// 
+///  - If sign bit=0, flip the sign bit (positives).
+///  - If sign bit=1, flip all bits     (negatives).
+/// 
+/// The function has the side effects that:
+///  - Larger integers are more positive values.
+///  - Float zero is mapped to center of integers (so clear to integer zero is a nice default for atomic max usage).
+/// 
+/// @param [in] x                       The floating point values to make sortable.
+/// 
+/// @returns
+/// The sortable integer values.
+/// 
+/// @ingroup GPU
+FfxUInt16x2 ffxFloatToSortableIntegerHalf(FfxUInt16x2 x)
+{
+    return x ^ ((ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000));
+}
+
+/// Convert a pair of sortable integers to a pair of 16bit floating point values.
+///
+/// The function has the side effects that:
+///  - If sign bit=1, flip the sign bit (positives).
+///  - If sign bit=0, flip all bits     (negatives).
+///
+/// @param [in] x                       The sortable integer values to make floating point.
+///
+/// @returns
+/// The floating point values.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxSortableIntegerToFloatHalf(FfxUInt16x2 x)
+{
+    return x ^ ((~ffxBitShiftRightHalf(x, FFX_BROADCAST_UINT16X2(15))) | FFX_BROADCAST_UINT16X2(0x8000));
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y0 [Zero] X0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesZeroY0ZeroX0(FfxUInt32x2 i)
+{
+    return ((i.x) & 0xffu) | ((i.y << 16) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y1 [Zero] X1
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesZeroY1ZeroX1(FfxUInt32x2 i)
+{
+    return ((i.x >> 8) & 0xffu) | ((i.y << 8) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y2 [Zero] X2
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesZeroY2ZeroX2(FfxUInt32x2 i)
+{
+    return ((i.x >> 16) & 0xffu) | ((i.y) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// [Zero] Y3 [Zero] X3
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesZeroY3ZeroX3(FfxUInt32x2 i)
+{
+    return ((i.x >> 24) & 0xffu) | ((i.y >> 8) & 0xff0000u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 Y1 X0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesY3Y2Y1X0(FfxUInt32x2 i)
+{
+    return ((i.x) & 0x000000ffu) | (i.y & 0xffffff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 Y1 X2
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesY3Y2Y1X2(FfxUInt32x2 i)
+{
+    return ((i.x >> 16) & 0x000000ffu) | (i.y & 0xffffff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 X0 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesY3Y2X0Y0(FfxUInt32x2 i)
+{
+    return ((i.x << 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 Y2 X2 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesY3Y2X2Y0(FfxUInt32x2 i)
+{
+    return ((i.x >> 8) & 0x0000ff00u) | (i.y & 0xffff00ffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 X0 Y1 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesY3X0Y1Y0(FfxUInt32x2 i)
+{
+    return ((i.x << 16) & 0x00ff0000u) | (i.y & 0xff00ffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y3 X2 Y1 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesY3X2Y1Y0(FfxUInt32x2 i)
+{
+    return ((i.x) & 0x00ff0000u) | (i.y & 0xff00ffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// X0 Y2 Y1 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesX0Y2Y1Y0(FfxUInt32x2 i)
+{
+    return ((i.x << 24) & 0xff000000u) | (i.y & 0x00ffffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// X2 Y2 Y1 Y0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesX2Y2Y1Y0(FfxUInt32x2 i)
+{
+    return ((i.x << 8) & 0xff000000u) | (i.y & 0x00ffffffu);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y2 X2 Y0 X0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesY2X2Y0X0(FfxUInt32x2 i)
+{
+    return ((i.x) & 0x00ff00ffu) | ((i.y << 8) & 0xff00ff00u);
+}
+
+/// Packs the bytes from the X and Y components of a FfxUInt32x2 into a single 32-bit integer.
+///
+/// The resulting integer will contain bytes in the following order, from most to least significant:
+/// Y2 Y0 X2 X0
+///
+/// @param [in] i                       The integer pair to pack.
+///
+/// @returns
+/// The packed integer value.
+///
+/// @ingroup GPU
+FfxUInt32 ffxPackBytesY2Y0X2X0(FfxUInt32x2 i)
+{
+    return (((i.x) & 0xffu) | ((i.x >> 8) & 0xff00u) | ((i.y << 16) & 0xff0000u) | ((i.y << 8) & 0xff000000u));
+}
+
+/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}.
+///
+/// @param [in] x                       The first float16x2 value to pack.
+/// @param [in] y                       The second float16x2 value to pack.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxPackX0Y0X1Y1UnsignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    x *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0);
+    y *= FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0);
+    return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y)))));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// r=ffxPermuteUByte0Float16x2ToUint2(d,i)
+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+///   Where 'k1' is an SGPR with 0x????
+///   Where 'k2' is an SGPR with 0x????
+///   V_PK_FMA_F16 i,i,k0.x,0
+///   V_PERM_B32 r.x,i,i,k1
+///   V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteUByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// r=ffxPermuteUByte1Float16x2ToUint2(d,i)
+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+///   Where 'k1' is an SGPR with 0x????
+///   Where 'k2' is an SGPR with 0x????
+///   V_PK_FMA_F16 i,i,k0.x,0
+///   V_PERM_B32 r.x,i,i,k1
+///   V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteUByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// r=ffxPermuteUByte2Float16x2ToUint2(d,i)
+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+///   Where 'k1' is an SGPR with 0x????
+///   Where 'k2' is an SGPR with 0x????
+///   V_PK_FMA_F16 i,i,k0.x,0
+///   V_PERM_B32 r.x,i,i,k1
+///   V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteUByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// r=ffxPermuteUByte3Float16x2ToUint2(d,i)
+///   Where 'k0' is an SGPR with {1.0/32768.0} packed into the lower 16-bits
+///   Where 'k1' is an SGPR with 0x????
+///   Where 'k2' is an SGPR with 0x????
+///   V_PK_FMA_F16 i,i,k0.x,0
+///   V_PERM_B32 r.x,i,i,k1
+///   V_PERM_B32 r.y,i,i,k2
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteUByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.  
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteUByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.  
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteUByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.  
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteUByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.  
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteUByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0);
+}
+
+/// Takes two Float16x2 values x and y, normalizes them and builds a single Uint16x2 value in the format {{x0,y0},{x1,y1}}.
+///
+/// @param [in] x                       The first float16x2 value to pack.
+/// @param [in] y                       The second float16x2 value to pack.
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxPackX0Y0X1Y1SignedToUint16x2(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    x = x * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0);
+    y = y * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0);
+    return FFX_UINT32_TO_UINT16X2(ffxPackBytesY2X2Y0X0(FfxUInt32x2(FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(x)), FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(y)))));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0)));
+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[0:7],   
+/// d.y[0:7] into r.y[0:7], i.x[8:15] into r.x[8:15], r.y[8:15] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteZeroBasedSByte0Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+    return FfxUInt32x2(ffxPackBytesY3Y2Y1X0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2Y1X2(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[8:15],   
+/// d.y[0:7] into r.y[8:15], i.x[0:7] into r.x[0:7], r.y[0:7] and i.y[0:15] into r.x[16:31], r.y[16:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteZeroBasedSByte1Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+    return FfxUInt32x2(ffxPackBytesY3Y2X0Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3Y2X2Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[16:23],   
+/// d.y[0:7] into r.y[16:23], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[8:15] into r.x[24:31], r.y[24:31] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteZeroBasedSByte2Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+    return FfxUInt32x2(ffxPackBytesY3X0Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesY3X2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value d, Float16x2 value i and a resulting FfxUInt32x2 value r, this function packs d.x[0:7] into r.x[24:31],   
+/// d.y[0:7] into r.y[24:31], i.x[0:15] into r.x[0:15], r.y[0:15] and i.y[0:7] into r.x[16:23], r.y[16:23] using 3 ops.
+///
+/// Zero-based flips the MSB bit of the byte (making 128 "exact zero" actually zero).
+/// This is useful if there is a desire for cleared values to decode as zero.
+///
+/// Handles signed byte values.
+///
+/// @param [in] d                       The FfxUInt32x2 value to be packed.
+/// @param [in] i                       The FfxFloat16x2 value to be packed. 
+///
+/// @returns
+/// The packed FfxUInt32x2 value.
+///
+/// @ingroup GPU
+FfxUInt32x2 ffxPermuteZeroBasedSByte3Float16x2ToUint2(FfxUInt32x2 d, FfxFloat16x2 i)
+{
+    FfxUInt32 b = FFX_UINT16X2_TO_UINT32(FFX_TO_UINT16X2(i * FFX_BROADCAST_FLOAT16X2(1.0 / 32768.0) + FFX_BROADCAST_FLOAT16X2(0.25 / 32768.0))) ^ 0x00800080u;
+    return FfxUInt32x2(ffxPackBytesX0Y2Y1Y0(FfxUInt32x2(d.x, b)), ffxPackBytesX2Y2Y1Y0(FfxUInt32x2(d.y, b)));
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.  
+///
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.  
+///
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.  
+///
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i))) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[0:7] into r.x[0:7] and i.y[0:7] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteZeroBasedSByte0Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY0ZeroX0(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[8:15] into r.x[0:7] and i.y[8:15] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteZeroBasedSByte1Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY1ZeroX1(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[16:23] into r.x[0:7] and i.y[16:23] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteZeroBasedSByte2Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY2ZeroX2(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Given a FfxUInt32x2 value i and a resulting Float16x2 value r, this function packs i.x[24:31] into r.x[0:7] and i.y[24:31] into r.y[0:7] using 2 ops.
+///  
+/// Handles signed byte values.
+///
+/// @param [in] i                       The FfxUInt32x2 value to be unpacked. 
+///
+/// @returns
+/// The unpacked FfxFloat16x2.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxPermuteZeroBasedSByte3Uint2ToFloat16x2(FfxUInt32x2 i)
+{
+    return FFX_TO_FLOAT16X2(FFX_UINT32_TO_UINT16X2(ffxPackBytesZeroY3ZeroX3(i) ^ 0x00800080u)) * FFX_BROADCAST_FLOAT16X2(32768.0) - FFX_BROADCAST_FLOAT16X2(0.25);
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16 ffxApproximateSqrtHalf(FfxFloat16 a)
+{
+    return FFX_TO_FLOAT16((FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)) + FFX_BROADCAST_UINT16(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxApproximateSqrtHalf(FfxFloat16x2 a)
+{
+    return FFX_TO_FLOAT16X2((FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)) + FFX_BROADCAST_UINT16X2(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the square root for.
+///
+/// @returns
+/// An approximation of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxApproximateSqrtHalf(FfxFloat16x3 a)
+{
+    return FFX_TO_FLOAT16X3((FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)) + FFX_BROADCAST_UINT16X3(0x1de2));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16 ffxApproximateReciprocalHalf(FfxFloat16 a)
+{
+    return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x7784) - FFX_TO_UINT16(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxApproximateReciprocalHalf(FfxFloat16x2 a)
+{
+    return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x7784) - FFX_TO_UINT16X2(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxApproximateReciprocalHalf(FfxFloat16x3 a)
+{
+    return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x7784) - FFX_TO_UINT16X3(a));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxApproximateReciprocalHalf(FfxFloat16x4 a)
+{
+    return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x7784) - FFX_TO_UINT16X4(a));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPU
+FfxFloat16 ffxApproximateReciprocalMediumHalf(FfxFloat16 a)
+{
+    FfxFloat16 b = FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x778d) - FFX_TO_UINT16(a));
+    return b * (-b * a + FFX_BROADCAST_FLOAT16(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxApproximateReciprocalMediumHalf(FfxFloat16x2 a)
+{
+    FfxFloat16x2 b = FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x778d) - FFX_TO_UINT16X2(a));
+    return b * (-b * a + FFX_BROADCAST_FLOAT16X2(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxApproximateReciprocalMediumHalf(FfxFloat16x3 a)
+{
+    FfxFloat16x3 b = FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x778d) - FFX_TO_UINT16X3(a));
+    return b * (-b * a + FFX_BROADCAST_FLOAT16X3(2.0));
+}
+
+/// Calculate a half-precision medium-quality approximation for the reciprocal of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal for.
+///
+/// @returns
+/// An approximation of the reciprocal, estimated to medium quality.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxApproximateReciprocalMediumHalf(FfxFloat16x4 a)
+{
+    FfxFloat16x4 b = FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x778d) - FFX_TO_UINT16X4(a));
+    return b * (-b * a + FFX_BROADCAST_FLOAT16X4(2.0));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16 ffxApproximateReciprocalSquareRootHalf(FfxFloat16 a)
+{
+    return FFX_TO_FLOAT16(FFX_BROADCAST_UINT16(0x59a3) - (FFX_TO_UINT16(a) >> FFX_BROADCAST_UINT16(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x2 a)
+{
+    return FFX_TO_FLOAT16X2(FFX_BROADCAST_UINT16X2(0x59a3) - (FFX_TO_UINT16X2(a) >> FFX_BROADCAST_UINT16X2(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x3 a)
+{
+    return FFX_TO_FLOAT16X3(FFX_BROADCAST_UINT16X3(0x59a3) - (FFX_TO_UINT16X3(a) >> FFX_BROADCAST_UINT16X3(1)));
+}
+
+/// Calculate a half-precision low-quality approximation for the reciprocal of the square root of a value.
+///
+/// For additional information on the approximation family of functions, you can refer to Michal Drobot's excellent
+/// presentation materials:
+///
+///  - https://michaldrobot.files.wordpress.com/2014/05/gcn_alu_opt_digitaldragons2014.pdf
+///  - https://github.com/michaldrobot/ShaderFastLibs/blob/master/ShaderFastMathLib.h
+///
+/// @param [in] a           The value to calculate an approximate to the reciprocal of the square root for.
+///
+/// @returns
+/// An approximation of the reciprocal of the square root, estimated to low quality.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxApproximateReciprocalSquareRootHalf(FfxFloat16x4 a)
+{
+    return FFX_TO_FLOAT16X4(FFX_BROADCAST_UINT16X4(0x59a3) - (FFX_TO_UINT16X4(a) >> FFX_BROADCAST_UINT16X4(1)));
+}
+
+/// An approximation of sine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate sine for.
+///
+/// @returns
+/// The approximate sine of <c><i>value</i></c>.
+FfxFloat16 ffxParabolicSinHalf(FfxFloat16 x)
+{
+    return x * abs(x) - x;
+}
+
+/// An approximation of sine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate sine for.
+///
+/// @returns
+/// The approximate sine of <c><i>value</i></c>.
+FfxFloat16x2 ffxParabolicSinHalf(FfxFloat16x2 x)
+{
+    return x * abs(x) - x;
+}
+
+/// An approximation of cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate cosine for.
+///
+/// @returns
+/// The approximate cosine of <c><i>value</i></c>.
+FfxFloat16 ffxParabolicCosHalf(FfxFloat16 x)
+{
+    x = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75));
+    x = x * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0);
+    return ffxParabolicSinHalf(x);
+}
+
+/// An approximation of cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate cosine for.
+///
+/// @returns
+/// The approximate cosine of <c><i>value</i></c>.
+FfxFloat16x2 ffxParabolicCosHalf(FfxFloat16x2 x)
+{
+    x = ffxFract(x * FFX_BROADCAST_FLOAT16X2(0.5) + FFX_BROADCAST_FLOAT16X2(0.75));
+    x = x * FFX_BROADCAST_FLOAT16X2(2.0) - FFX_BROADCAST_FLOAT16X2(1.0);
+    return ffxParabolicSinHalf(x);
+}
+
+/// An approximation of both sine and cosine.
+///
+/// Valid input range is {-1 to 1} representing {0 to 2 pi}, and the output range
+/// is {-1/4 to 1/4} representing {-1 to 1}.
+///
+/// @param [in] x            The value to calculate approximate cosine for.
+///
+/// @returns
+/// A <c><i>FfxFloat32x2</i></c> containing approximations of both sine and cosine of <c><i>value</i></c>.
+FfxFloat16x2 ffxParabolicSinCosHalf(FfxFloat16 x)
+{
+    FfxFloat16 y = ffxFract(x * FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16(0.75));
+    y     = y * FFX_BROADCAST_FLOAT16(2.0) - FFX_BROADCAST_FLOAT16(1.0);
+    return ffxParabolicSinHalf(FfxFloat16x2(x, y));
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxUInt16 ffxZeroOneAndHalf(FfxUInt16 x, FfxUInt16 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxZeroOneAndHalf(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxUInt16x3 ffxZeroOneAndHalf(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxUInt16x4 ffxZeroOneAndHalf(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPU
+FfxUInt16 ffxZeroOneNotHalf(FfxUInt16 x)
+{
+    return x ^ FFX_BROADCAST_UINT16(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxZeroOneNotHalf(FfxUInt16x2 x)
+{
+    return x ^ FFX_BROADCAST_UINT16X2(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPU
+FfxUInt16x3 ffxZeroOneNotHalf(FfxUInt16x3 x)
+{
+    return x ^ FFX_BROADCAST_UINT16X3(1);
+}
+
+/// Conditional free logic NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the NOT operator.
+/// @param [in] y           The second value to be fed into the NOT operator.
+///
+/// @returns
+/// Result of the NOT operation.
+///
+/// @ingroup GPU
+FfxUInt16x4 ffxZeroOneNotHalf(FfxUInt16x4 x)
+{
+    return x ^ FFX_BROADCAST_UINT16X4(1);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxUInt16 ffxZeroOneOrHalf(FfxUInt16 x, FfxUInt16 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxZeroOneOrHalf(FfxUInt16x2 x, FfxUInt16x2 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxUInt16x3 ffxZeroOneOrHalf(FfxUInt16x3 x, FfxUInt16x3 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxUInt16x4 ffxZeroOneOrHalf(FfxUInt16x4 x, FfxUInt16x4 y)
+{
+    return max(x, y);
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x           The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPU
+FfxUInt16 ffxZeroOneFloat16ToUint16(FfxFloat16 x)
+{
+    return FFX_TO_UINT16(x * FFX_TO_FLOAT16(FFX_TO_UINT16(1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x           The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxZeroOneFloat16x2ToUint16x2(FfxFloat16x2 x)
+{
+    return FFX_TO_UINT16X2(x * FFX_TO_FLOAT16X2(FfxUInt16x2(1, 1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x           The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPU
+FfxUInt16x3 ffxZeroOneFloat16x3ToUint16x3(FfxFloat16x3 x)
+{
+    return FFX_TO_UINT16X3(x * FFX_TO_FLOAT16X3(FfxUInt16x3(1, 1, 1)));
+}
+
+/// Convert a half-precision FfxFloat32 value between 0.0f and 1.0f to a half-precision Uint.
+///
+/// @param [in] x           The value to converted to a Uint.
+///
+/// @returns
+/// The converted Uint value.
+///
+/// @ingroup GPU
+FfxUInt16x4 ffxZeroOneFloat16x4ToUint16x4(FfxFloat16x4 x)
+{
+    return FFX_TO_UINT16X4(x * FFX_TO_FLOAT16X4(FfxUInt16x4(1, 1, 1, 1)));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPU
+FfxFloat16 ffxZeroOneUint16ToFloat16(FfxUInt16 x)
+{
+    return FFX_TO_FLOAT16(x * FFX_TO_UINT16(FFX_TO_FLOAT16(1.0)));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxZeroOneUint16x2ToFloat16x2(FfxUInt16x2 x)
+{
+    return FFX_TO_FLOAT16X2(x * FFX_TO_UINT16X2(FfxUInt16x2(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxZeroOneUint16x3ToFloat16x3(FfxUInt16x3 x)
+{
+    return FFX_TO_FLOAT16X3(x * FFX_TO_UINT16X3(FfxUInt16x3(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
+}
+
+/// Convert a half-precision FfxUInt32 value between 0 and 1 to a half-precision FfxFloat32.
+///
+/// @param [in] x           The value to converted to a half-precision FfxFloat32.
+///
+/// @returns
+/// The converted half-precision FfxFloat32 value.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxZeroOneUint16x4ToFloat16x4(FfxUInt16x4 x)
+{
+    return FFX_TO_FLOAT16X4(x * FFX_TO_UINT16X4(FfxUInt16x4(FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0), FFX_TO_FLOAT16(1.0))));
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxFloat16 ffxZeroOneAndHalf(FfxFloat16 x, FfxFloat16 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxZeroOneAndHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxZeroOneAndHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+///
+/// @returns
+/// Result of the AND operation.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxZeroOneAndHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return min(x, y);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND NOT operator.
+/// @param [in] y           The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPU
+FfxFloat16 ffxSignedZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y)
+{
+    return (-x) * y + FFX_BROADCAST_FLOAT16(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND NOT operator.
+/// @param [in] y           The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxSignedZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return (-x) * y + FFX_BROADCAST_FLOAT16X2(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND NOT operator.
+/// @param [in] y           The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxSignedZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return (-x) * y + FFX_BROADCAST_FLOAT16X3(1.0);
+}
+
+/// Conditional free logic AND NOT operation using two half-precision values.
+///
+/// @param [in] x           The first value to be fed into the AND NOT operator.
+/// @param [in] y           The second value to be fed into the AND NOT operator.
+///
+/// @returns
+/// Result of the AND NOT operation.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxSignedZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return (-x) * y + FFX_BROADCAST_FLOAT16X4(1.0);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat16 ffxZeroOneAndOrHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxZeroOneAndOrHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxZeroOneAndOrHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Conditional free logic AND operation using two half-precision values followed by
+/// a NOT operation using the resulting value and a third half-precision value.
+///
+/// @param [in] x           The first value to be fed into the AND operator.
+/// @param [in] y           The second value to be fed into the AND operator.
+/// @param [in] z           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxZeroOneAndOrHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+    return ffxSaturate(x * y + z);
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPU
+FfxFloat16 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16 x)
+{
+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_POSITIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x2 x)
+{
+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_POSITIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x3 x)
+{
+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_POSITIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if greater than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the greater than zero comparison.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxZeroOneIsGreaterThanZeroHalf(FfxFloat16x4 x)
+{
+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_POSITIVE_INFINITY_HALF));
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat16 ffxZeroOneNotHalf(FfxFloat16 x)
+{
+    return FFX_BROADCAST_FLOAT16(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxZeroOneNotHalf(FfxFloat16x2 x)
+{
+    return FFX_BROADCAST_FLOAT16X2(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxZeroOneNotHalf(FfxFloat16x3 x)
+{
+    return FFX_BROADCAST_FLOAT16X3(1.0) - x;
+}
+
+/// Conditional free logic signed NOT operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the AND OR operator.
+///
+/// @returns
+/// Result of the AND OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxZeroOneNotHalf(FfxFloat16x4 x)
+{
+    return FFX_BROADCAST_FLOAT16X4(1.0) - x;
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxFloat16 ffxZeroOneOrHalf(FfxFloat16 x, FfxFloat16 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxZeroOneOrHalf(FfxFloat16x2 x, FfxFloat16x2 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxZeroOneOrHalf(FfxFloat16x3 x, FfxFloat16x3 y)
+{
+    return max(x, y);
+}
+
+/// Conditional free logic OR operation using two half-precision FfxFloat32 values.
+///
+/// @param [in] x           The first value to be fed into the OR operator.
+/// @param [in] y           The second value to be fed into the OR operator.
+///
+/// @returns
+/// Result of the OR operation.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxZeroOneOrHalf(FfxFloat16x4 x, FfxFloat16x4 y)
+{
+    return max(x, y);
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPU
+FfxFloat16 ffxZeroOneSelectHalf(FfxFloat16 x, FfxFloat16 y, FfxFloat16 z)
+{
+    FfxFloat16 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxZeroOneSelectHalf(FfxFloat16x2 x, FfxFloat16x2 y, FfxFloat16x2 z)
+{
+    FfxFloat16x2 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxZeroOneSelectHalf(FfxFloat16x3 x, FfxFloat16x3 y, FfxFloat16x3 z)
+{
+    FfxFloat16x3 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Choose between two half-precision FfxFloat32 values if the first paramter is greater than zero.
+///
+/// @param [in] x           The value to compare against zero.
+/// @param [in] y           The value to return if the comparision is greater than zero.
+/// @param [in] z           The value to return if the comparision is less than or equal to zero.
+///
+/// @returns
+/// The selected value.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxZeroOneSelectHalf(FfxFloat16x4 x, FfxFloat16x4 y, FfxFloat16x4 z)
+{
+    FfxFloat16x4 r = (-x) * z + z;
+    return x * y + r;
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPU
+FfxFloat16 ffxZeroOneIsSignedHalf(FfxFloat16 x)
+{
+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16(FFX_NEGATIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxZeroOneIsSignedHalf(FfxFloat16x2 x)
+{
+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X2(FFX_NEGATIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxZeroOneIsSignedHalf(FfxFloat16x3 x)
+{
+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X3(FFX_NEGATIVE_INFINITY_HALF));
+}
+
+/// Given a half-precision value, returns 1.0 if less than zero and 0.0 if not.
+///
+/// @param [in] x           The value to be compared.
+///
+/// @returns
+/// Result of the sign value.
+///
+/// @ingroup GPU
+FfxFloat16x4 ffxZeroOneIsSignedHalf(FfxFloat16x4 x)
+{
+    return ffxSaturate(x * FFX_BROADCAST_FLOAT16X4(FFX_NEGATIVE_INFINITY_HALF));
+}
+
+/// Compute a Rec.709 color space.
+/// 
+/// Rec.709 is used for some HDTVs.
+/// 
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c           The color to convert to Rec. 709.
+/// 
+/// @returns
+/// The <c><i>color</i></c> in Rec.709 space.
+/// 
+/// @ingroup GPU
+FfxFloat16 ffxRec709FromLinearHalf(FfxFloat16 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+    return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y);
+}
+
+/// Compute a Rec.709 color space.
+/// 
+/// Rec.709 is used for some HDTVs.
+/// 
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c           The color to convert to Rec. 709.
+/// 
+/// @returns
+/// The <c><i>color</i></c> in Rec.709 space.
+/// 
+/// @ingroup GPU
+FfxFloat16x2 ffxRec709FromLinearHalf(FfxFloat16x2 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+    return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy);
+}
+
+/// Compute a Rec.709 color space.
+/// 
+/// Rec.709 is used for some HDTVs.
+/// 
+/// Both Rec.709 and sRGB have a linear segment which as spec'ed would intersect the curved segment 2 times.
+///  (a.) For 8-bit sRGB, steps {0 to 10.3} are in the linear region (4% of the encoding range).
+///  (b.) For 8-bit  709, steps {0 to 20.7} are in the linear region (8% of the encoding range).
+///
+/// @param [in] c           The color to convert to Rec. 709.
+/// 
+/// @returns
+/// The <c><i>color</i></c> in Rec.709 space.
+/// 
+/// @ingroup GPU
+FfxFloat16x3 ffxRec709FromLinearHalf(FfxFloat16x3 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.018 * 4.5, 4.5, 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.099, -0.099);
+    return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
+/// 
+/// @param [in] c              The value to convert to gamma space from linear.
+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPU
+FfxFloat16 ffxGammaFromLinearHalf(FfxFloat16 c, FfxFloat16 rcpX)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16(rcpX));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
+/// 
+/// @param [in] c              The value to convert to gamma space from linear.
+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxGammaFromLinearHalf(FfxFloat16x2 c, FfxFloat16 rcpX)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X2(rcpX));
+}
+
+/// Compute a gamma value from a linear value.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+/// 
+/// Note: 'rcpX' is '1/x', where the 'x' is what would be used in <c><i>ffxLinearFromGammaHalf</i></c>.
+/// 
+/// @param [in] c              The value to convert to gamma space from linear.
+/// @param [in] rcpX           The reciprocal of power value used for the gamma curve.
+///
+/// @returns
+/// A value in gamma space.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxGammaFromLinearHalf(FfxFloat16x3 c, FfxFloat16 rcpX)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X3(rcpX));
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c           The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPU
+FfxFloat16 ffxSrgbFromLinearHalf(FfxFloat16 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+    return clamp(j.x, c * j.y, pow(c, j.z) * k.x + k.y);
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c           The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxSrgbFromLinearHalf(FfxFloat16x2 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+    return clamp(j.xx, c * j.yy, pow(c, j.zz) * k.xx + k.yy);
+}
+
+/// Compute an SRGB value from a linear value.
+///
+/// @param [in] c           The value to convert to SRGB from linear.
+///
+/// @returns
+/// A value in SRGB space.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxSrgbFromLinearHalf(FfxFloat16x3 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.0031308 * 12.92, 12.92, 1.0 / 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.055, -0.055);
+    return clamp(j.xxx, c * j.yyy, pow(c, j.zzz) * k.xxx + k.yyy);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c           The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPU
+FfxFloat16 ffxSquareRootHalf(FfxFloat16 c)
+{
+    return sqrt(c);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c           The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxSquareRootHalf(FfxFloat16x2 c)
+{
+    return sqrt(c);
+}
+
+/// Compute the square root of a value.
+///
+/// @param [in] c           The value to compute the square root for.
+///
+/// @returns
+/// A square root of the input value.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxSquareRootHalf(FfxFloat16x3 c)
+{
+    return sqrt(c);
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c           The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPU
+FfxFloat16 ffxCubeRootHalf(FfxFloat16 c)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16(1.0 / 3.0));
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c           The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxCubeRootHalf(FfxFloat16x2 c)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X2(1.0 / 3.0));
+}
+
+/// Compute the cube root of a value.
+///
+/// @param [in] c           The value to compute the cube root for.
+///
+/// @returns
+/// A cube root of the input value.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxCubeRootHalf(FfxFloat16x3 c)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X3(1.0 / 3.0));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16 ffxLinearFromRec709Half(FfxFloat16 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxLinearFromRec709Half(FfxFloat16x2 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a REC.709 value.
+///
+/// @param [in] c           The value to convert to linear from REC.709.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxLinearFromRec709Half(FfxFloat16x3 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.081 / 4.5, 1.0 / 4.5, 1.0 / 0.45);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.099, 0.099 / 1.099);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in gamma space.
+/// @param [in] x           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16 ffxLinearFromGammaHalf(FfxFloat16 c, FfxFloat16 x)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16(x));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in gamma space.
+/// @param [in] x           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxLinearFromGammaHalf(FfxFloat16x2 c, FfxFloat16 x)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X2(x));
+}
+
+/// Compute a linear value from a value in a gamma space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in gamma space.
+/// @param [in] x           The power value used for the gamma curve.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxLinearFromGammaHalf(FfxFloat16x3 c, FfxFloat16 x)
+{
+    return pow(c, FFX_BROADCAST_FLOAT16X3(x));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16 ffxLinearFromSrgbHalf(FfxFloat16 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.x), c * j.y, pow(c * k.x + k.y, j.z));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16x2 ffxLinearFromSrgbHalf(FfxFloat16x2 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xx), c * j.yy, pow(c * k.xx + k.yy, j.zz));
+}
+
+/// Compute a linear value from a value in a SRGB space.
+///
+/// Typically 2.2 for some PC displays, or 2.4-2.5 for CRTs, or 2.2 FreeSync2 native.
+///
+/// @param [in] c           The value to convert to linear in SRGB space.
+///
+/// @returns
+/// A value in linear space.
+///
+/// @ingroup GPU
+FfxFloat16x3 ffxLinearFromSrgbHalf(FfxFloat16x3 c)
+{
+    FfxFloat16x3 j = FfxFloat16x3(0.04045 / 12.92, 1.0 / 12.92, 2.4);
+    FfxFloat16x2 k = FfxFloat16x2(1.0 / 1.055, 0.055 / 1.055);
+    return ffxZeroOneSelectHalf(ffxZeroOneIsSignedHalf(c - j.xxx), c * j.yyy, pow(c * k.xxx + k.yyy, j.zzz));
+}
+
+/// A remapping of 64x1 to 8x8 imposing rotated 2x2 pixel quads in quad linear.
+/// 
+///  543210
+///  ======
+///  ..xxx.
+///  yy...y
+/// 
+/// @param [in] a       The input 1D coordinates to remap.
+///
+/// @returns
+/// The remapped 2D coordinates.
+///
+/// @ingroup GPU
+FfxUInt16x2 ffxRemapForQuadHalf(FfxUInt32 a)
+{
+    return FfxUInt16x2(bitfieldExtract(a, 1u, 3u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), a, 1u));
+}
+
+/// A helper function performing a remap 64x1 to 8x8 remapping which is necessary for 2D wave reductions.
+///
+/// The 64-wide lane indices to 8x8 remapping is performed as follows:
+/// 
+///     00 01 08 09 10 11 18 19
+///     02 03 0a 0b 12 13 1a 1b
+///     04 05 0c 0d 14 15 1c 1d
+///     06 07 0e 0f 16 17 1e 1f
+///     20 21 28 29 30 31 38 39
+///     22 23 2a 2b 32 33 3a 3b
+///     24 25 2c 2d 34 35 3c 3d
+///     26 27 2e 2f 36 37 3e 3f
+///
+/// @param [in] a       The input 1D coordinate to remap.
+/// 
+/// @returns
+/// The remapped 2D coordinates.
+/// 
+/// @ingroup GPU
+FfxUInt16x2 ffxRemapForWaveReductionHalf(FfxUInt32 a)
+{
+    return FfxUInt16x2(bitfieldInsertMask(bitfieldExtract(a, 2u, 3u), a, 1u), bitfieldInsertMask(bitfieldExtract(a, 3u, 3u), bitfieldExtract(a, 1u, 2u), 2u));
+}
+
+#endif  // FFX_HALF
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h b/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
new file mode 100644
index 0000000000..ad4ff6552d
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_hlsl.h
@@ -0,0 +1,1502 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+/// A define for abstracting shared memory between shading languages.
+///
+/// @ingroup GPU
+#define FFX_GROUPSHARED groupshared
+
+/// A define for abstracting compute memory barriers between shading languages.
+///
+/// @ingroup GPU
+#define FFX_GROUP_MEMORY_BARRIER GroupMemoryBarrierWithGroupSync
+
+/// A define added to accept static markup on functions to aid CPU/GPU portability of code.
+///
+/// @ingroup GPU
+#define FFX_STATIC static
+
+/// A define for abstracting loop unrolling between shading languages.
+///
+/// @ingroup GPU 
+#define FFX_UNROLL [unroll]
+
+/// A define for abstracting a 'greater than' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_GREATER_THAN(x, y) x > y
+
+/// A define for abstracting a 'greater than or equal' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_GREATER_THAN_EQUAL(x, y) x >= y
+
+/// A define for abstracting a 'less than' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_LESS_THAN(x, y) x < y
+
+/// A define for abstracting a 'less than or equal' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_LESS_THAN_EQUAL(x, y) x <= y
+
+/// A define for abstracting an 'equal' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_EQUAL(x, y) x == y
+
+/// A define for abstracting a 'not equal' comparison operator between two types.
+///
+/// @ingroup GPU
+#define FFX_NOT_EQUAL(x, y) x != y
+
+/// Broadcast a scalar value to a 1-dimensional floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_FLOAT32(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 2-dimensional floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_FLOAT32X2(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 3-dimensional floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_FLOAT32X3(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 4-dimensional floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_FLOAT32X4(x) FfxFloat32(x)
+
+/// Broadcast a scalar value to a 1-dimensional unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_UINT32(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_UINT32X2(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_UINT32X3(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_UINT32X4(x) FfxUInt32(x)
+
+/// Broadcast a scalar value to a 1-dimensional signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_INT32(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_INT32X2(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_INT32X3(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_INT32X4(x) FfxInt32(x)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_FLOAT16(a)   FFX_MIN16_F(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_FLOAT16X2(a) FFX_MIN16_F(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_FLOAT16X3(a) FFX_MIN16_F(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision floating point vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_FLOAT16X4(a) FFX_MIN16_F(a)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_UINT16(a)   FFX_MIN16_U(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_UINT16X2(a) FFX_MIN16_U(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_UINT16X3(a) FFX_MIN16_U(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision unsigned integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_UINT16X4(a) FFX_MIN16_U(a)
+
+/// Broadcast a scalar value to a 1-dimensional half-precision signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_INT16(a)   FFX_MIN16_I(a)
+
+/// Broadcast a scalar value to a 2-dimensional half-precision signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_INT16X2(a) FFX_MIN16_I(a)
+
+/// Broadcast a scalar value to a 3-dimensional half-precision signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_INT16X3(a) FFX_MIN16_I(a)
+
+/// Broadcast a scalar value to a 4-dimensional half-precision signed integer vector.
+///
+/// @ingroup GPU
+#define FFX_BROADCAST_MIN_INT16X4(a) FFX_MIN16_I(a)
+
+/// Pack 2x32-bit floating point values in a single 32bit value.
+/// 
+/// This function first converts each component of <c><i>value</i></c> into their nearest 16-bit floating
+/// point representation, and then stores the X and Y components in the lower and upper 16 bits of the
+/// 32bit unsigned integer respectively.
+///
+/// @param [in] value               A 2-dimensional floating point value to convert and pack.
+/// 
+/// @returns
+/// A packed 32bit value containing 2 16bit floating point values.
+/// 
+/// @ingroup HLSL
+FfxUInt32 packHalf2x16(FfxFloat32x2 value)
+{
+    return f32tof16(value.x) | (f32tof16(value.y) << 16);
+}
+
+/// Broadcast a scalar value to a 2-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxBroadcast2(FfxFloat32 value)
+{
+    return FfxFloat32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxBroadcast3(FfxFloat32 value)
+{
+    return FfxFloat32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional floating point vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional floating point vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxBroadcast4(FfxFloat32 value)
+{
+    return FfxFloat32x4(value, value, value, value);
+}
+
+/// Broadcast a scalar value to a 2-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxInt32x2 ffxBroadcast2(FfxInt32 value)
+{
+    return FfxInt32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxUInt32x3 ffxBroadcast3(FfxInt32 value)
+{
+    return FfxUInt32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional signed integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional signed integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxInt32x4 ffxBroadcast4(FfxInt32 value)
+{
+    return FfxInt32x4(value, value, value, value);
+}
+
+/// Broadcast a scalar value to a 2-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 2-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxUInt32x2 ffxBroadcast2(FfxUInt32 value)
+{
+    return FfxUInt32x2(value, value);
+}
+
+/// Broadcast a scalar value to a 3-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 3-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxUInt32x3 ffxBroadcast3(FfxUInt32 value)
+{
+    return FfxUInt32x3(value, value, value);
+}
+
+/// Broadcast a scalar value to a 4-dimensional unsigned integer vector.
+///
+/// @param [in] value               The value to to broadcast.
+///
+/// @returns
+/// A 4-dimensional unsigned integer vector with <c><i>value</i></c> in each component.
+///
+/// @ingroup HLSL
+FfxUInt32x4 ffxBroadcast4(FfxUInt32 value)
+{
+    return FfxUInt32x4(value, value, value, value);
+}
+
+FfxUInt32 bitfieldExtract(FfxUInt32 src, FfxUInt32 off, FfxUInt32 bits)
+{
+    FfxUInt32 mask = (1u << bits) - 1;
+    return (src >> off) & mask;
+}
+
+FfxUInt32 bitfieldInsert(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 mask)
+{
+    return (ins & mask) | (src & (~mask));
+}
+
+FfxUInt32 bitfieldInsertMask(FfxUInt32 src, FfxUInt32 ins, FfxUInt32 bits)
+{
+    FfxUInt32 mask = (1u << bits) - 1;
+    return (ins & mask) | (src & (~mask));
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSL
+FfxUInt32 ffxAsUInt32(FfxFloat32 x)
+{
+    return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSL
+FfxUInt32x2 ffxAsUInt32(FfxFloat32x2 x)
+{
+    return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSL
+FfxUInt32x3 ffxAsUInt32(FfxFloat32x3 x)
+{
+    return asuint(x);
+}
+
+/// Interprets the bit pattern of x as an unsigned integer.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as an unsigned integer.
+///
+/// @ingroup HLSL
+FfxUInt32x4 ffxAsUInt32(FfxFloat32x4 x)
+{
+    return asuint(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSL
+FfxFloat32 ffxAsFloat(FfxUInt32 x)
+{
+    return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxAsFloat(FfxUInt32x2 x)
+{
+    return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxAsFloat(FfxUInt32x3 x)
+{
+    return asfloat(x);
+}
+
+/// Interprets the bit pattern of x as a floating-point number.
+///
+/// @param [in] value               The input value.
+///
+/// @returns
+/// The input interpreted as a floating-point number.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxAsFloat(FfxUInt32x4 x)
+{
+    return asfloat(x);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32 ffxLerp(FfxFloat32 x, FfxFloat32 y, FfxFloat32 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxLerp(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxLerp(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Compute the linear interopation between two values.
+///
+/// Implemented by calling the HLSL <c><i>mix</i></c> instrinsic function. Implements the
+/// following math:
+///
+///     (1 - t) * x + t * y
+///
+/// @param [in] x               The first value to lerp between.
+/// @param [in] y               The second value to lerp between.
+/// @param [in] t               The value to determine how much of <c><i>x</i></c> and how much of <c><i>y</i></c>.
+///
+/// @returns
+/// A linearly interpolated value between <c><i>x</i></c> and <c><i>y</i></c> according to <c><i>t</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxLerp(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 t)
+{
+    return lerp(x, y, t);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32 ffxSaturate(FfxFloat32 x)
+{
+    return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxSaturate(FfxFloat32x2 x)
+{
+    return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxSaturate(FfxFloat32x3 x)
+{
+    return saturate(x);
+}
+
+/// Clamp a value to a [0..1] range.
+///
+/// @param [in] x               The value to clamp to [0..1] range.
+///
+/// @returns
+/// The clamped version of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxSaturate(FfxFloat32x4 x)
+{
+    return saturate(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+/// 
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32 ffxFract(FfxFloat32 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxFract(FfxFloat32x2 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxFract(FfxFloat32x3 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the factional part of a decimal value.
+///
+/// This function calculates <c><i>x - floor(x)</i></c>. Where <c><i>floor</i></c> is the intrinsic HLSL function.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware. It is
+/// worth further noting that this function is intentionally distinct from the HLSL <c><i>frac</i></c> intrinsic 
+/// function.
+///
+/// @param [in] x               The value to compute the fractional part from.
+///
+/// @returns
+/// The fractional part of <c><i>x</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxFract(FfxFloat32x4 x)
+{
+    return x - floor(x);
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+/// 
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+/// 
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32 ffxMax3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+/// 
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxMax3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+/// 
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxMax3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxMax3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxUInt32 ffxMax3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxUInt32x2 ffxMax3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxUInt32x3 ffxMax3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the maximum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MAX3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the max calculation.
+/// @param [in] y               The second value to include in the max calcuation.
+/// @param [in] z               The third value to include in the max calcuation.
+///
+/// @returns
+/// The maximum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxUInt32x4 ffxMax3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
+{
+    return max(x, max(y, z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32 ffxMed3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxMed3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxMed3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxMed3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxInt32 ffxMed3(FfxInt32 x, FfxInt32 y, FfxInt32 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+    // return min(max(min(y, z), x), max(y, z));
+    // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxInt32x2 ffxMed3(FfxInt32x2 x, FfxInt32x2 y, FfxInt32x2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+    // return min(max(min(y, z), x), max(y, z));
+    // return max(max(x, y), z) == x ? max(y, z) : (max(max(x, y), z) == y ? max(x, z) : max(x, y));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_F32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxInt32x3 ffxMed3(FfxInt32x3 x, FfxInt32x3 y, FfxInt32x3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the median of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MED3_I32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the median calculation.
+/// @param [in] y               The second value to include in the median calcuation.
+/// @param [in] z               The third value to include in the median calcuation.
+///
+/// @returns
+/// The median value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxInt32x4 ffxMed3(FfxInt32x4 x, FfxInt32x4 y, FfxInt32x4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32 ffxMin3(FfxFloat32 x, FfxFloat32 y, FfxFloat32 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</i></c> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x2 ffxMin3(FfxFloat32x2 x, FfxFloat32x2 y, FfxFloat32x2 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_I32</c></i> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x3 ffxMin3(FfxFloat32x3 x, FfxFloat32x3 y, FfxFloat32x3 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxFloat32x4 ffxMin3(FfxFloat32x4 x, FfxFloat32x4 y, FfxFloat32x4 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxUInt32 ffxMin3(FfxUInt32 x, FfxUInt32 y, FfxUInt32 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxUInt32x2 ffxMin3(FfxUInt32x2 x, FfxUInt32x2 y, FfxUInt32x2 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxUInt32x3 ffxMin3(FfxUInt32x3 x, FfxUInt32x3 y, FfxUInt32x3 z)
+{
+    return min(x, min(y, z));
+}
+
+/// Compute the minimum of three values.
+///
+/// NOTE: This function should compile down to a single <c><i>V_MIN3_F32</c></i> operation on GCN/RDNA hardware.
+///
+/// @param [in] x               The first value to include in the min calculation.
+/// @param [in] y               The second value to include in the min calcuation.
+/// @param [in] z               The third value to include in the min calcuation.
+///
+/// @returns
+/// The minimum value of <c><i>x</i></c>, <c><i>y</i></c>, and <c><i>z</i></c>.
+///
+/// @ingroup HLSL
+FfxUInt32x4 ffxMin3(FfxUInt32x4 x, FfxUInt32x4 y, FfxUInt32x4 z)
+{
+    return min(x, min(y, z));
+}
+
+
+FfxUInt32 AShrSU1(FfxUInt32 a, FfxUInt32 b)
+{
+    return FfxUInt32(FfxInt32(a) >> FfxInt32(b));
+}
+
+//==============================================================================================================================
+//                                                          HLSL HALF
+//==============================================================================================================================
+#if FFX_HALF
+
+//==============================================================================================================================
+// Need to use manual unpack to get optimal execution (don't use packed types in buffers directly).
+// Unpack requires this pattern: https://gpuopen.com/first-steps-implementing-fp16/
+FFX_MIN16_F2 ffxUint32ToFloat16x2(FfxUInt32 x)
+{
+	FfxFloat32x2 t = f16tof32(FfxUInt32x2(x & 0xFFFF, x >> 16));
+	return FFX_MIN16_F2(t);
+}
+FFX_MIN16_F4 ffxUint32x2ToFloat16x4(FfxUInt32x2 x)
+{
+	return FFX_MIN16_F4(ffxUint32ToFloat16x2(x.x), ffxUint32ToFloat16x2(x.y));
+}
+FFX_MIN16_U2 ffxUint32ToUint16x2(FfxUInt32 x)
+{
+	FfxUInt32x2 t = FfxUInt32x2(x & 0xFFFF, x >> 16);
+	return FFX_MIN16_U2(t);
+}
+FFX_MIN16_U4 ffxUint32x2ToUint16x4(FfxUInt32x2 x)
+{
+	return FFX_MIN16_U4(ffxUint32ToUint16x2(x.x), ffxUint32ToUint16x2(x.y));
+}
+#define FFX_UINT32_TO_FLOAT16X2(x) ffxUint32ToFloat16x2(FfxUInt32(x))
+#define FFX_UINT32X2_TO_FLOAT16X4(x) ffxUint32x2ToFloat16x4(FfxUInt32x2(x))
+#define FFX_UINT32_TO_UINT16X2(x) ffxUint32ToUint16x2(FfxUInt32(x))
+#define FFX_UINT32X2_TO_UINT16X4(x) ffxUint32x2ToUint16x4(FfxUInt32x2(x))
+//------------------------------------------------------------------------------------------------------------------------------
+FfxUInt32 FFX_MIN16_F2ToUint32(FFX_MIN16_F2 x)
+{
+	return f32tof16(x.x) + (f32tof16(x.y) << 16);
+}
+FfxUInt32x2 FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4 x)
+{
+	return FfxUInt32x2(FFX_MIN16_F2ToUint32(x.xy), FFX_MIN16_F2ToUint32(x.zw));
+}
+FfxUInt32 FFX_MIN16_U2ToUint32(FFX_MIN16_U2 x)
+{
+	return FfxUInt32(x.x) + (FfxUInt32(x.y) << 16);
+}
+FfxUInt32x2 FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4 x)
+{
+	return FfxUInt32x2(FFX_MIN16_U2ToUint32(x.xy), FFX_MIN16_U2ToUint32(x.zw));
+}
+#define FFX_FLOAT16X2_TO_UINT32(x) FFX_MIN16_F2ToUint32(FFX_MIN16_F2(x))
+#define FFX_FLOAT16X4_TO_UINT32X2(x) FFX_MIN16_F4ToUint32x2(FFX_MIN16_F4(x))
+#define FFX_UINT16X2_TO_UINT32(x) FFX_MIN16_U2ToUint32(FFX_MIN16_U2(x))
+#define FFX_UINT16X4_TO_UINT32X2(x) FFX_MIN16_U4ToUint32x2(FFX_MIN16_U4(x))
+
+#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
+#define FFX_TO_UINT16(x) asuint16(x)
+#define FFX_TO_UINT16X2(x) asuint16(x)
+#define FFX_TO_UINT16X3(x) asuint16(x)
+#define FFX_TO_UINT16X4(x) asuint16(x)
+#else
+#define FFX_TO_UINT16(a) FFX_MIN16_U(f32tof16(FfxFloat32(a)))
+#define FFX_TO_UINT16X2(a) FFX_MIN16_U2(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y))
+#define FFX_TO_UINT16X3(a) FFX_MIN16_U3(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z))
+#define FFX_TO_UINT16X4(a) FFX_MIN16_U4(FFX_TO_UINT16((a).x), FFX_TO_UINT16((a).y), FFX_TO_UINT16((a).z), FFX_TO_UINT16((a).w))
+#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
+
+#if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
+#define FFX_TO_FLOAT16(x) asfloat16(x)
+#define FFX_TO_FLOAT16X2(x) asfloat16(x)
+#define FFX_TO_FLOAT16X3(x) asfloat16(x)
+#define FFX_TO_FLOAT16X4(x) asfloat16(x)
+#else
+#define FFX_TO_FLOAT16(a) FFX_MIN16_F(f16tof32(FfxUInt32(a)))
+#define FFX_TO_FLOAT16X2(a) FFX_MIN16_F2(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y))
+#define FFX_TO_FLOAT16X3(a) FFX_MIN16_F3(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z))
+#define FFX_TO_FLOAT16X4(a) FFX_MIN16_F4(FFX_TO_FLOAT16((a).x), FFX_TO_FLOAT16((a).y), FFX_TO_FLOAT16((a).z), FFX_TO_FLOAT16((a).w))
+#endif // #if defined(FFX_HLSL_6_2) && !defined(FFX_NO_16_BIT_CAST)
+
+//==============================================================================================================================
+#define FFX_BROADCAST_FLOAT16(a)   FFX_MIN16_F(a)
+#define FFX_BROADCAST_FLOAT16X2(a) FFX_MIN16_F(a)
+#define FFX_BROADCAST_FLOAT16X3(a) FFX_MIN16_F(a)
+#define FFX_BROADCAST_FLOAT16X4(a) FFX_MIN16_F(a)
+
+//------------------------------------------------------------------------------------------------------------------------------
+#define FFX_BROADCAST_INT16(a)   FFX_MIN16_I(a)
+#define FFX_BROADCAST_INT16X2(a) FFX_MIN16_I(a)
+#define FFX_BROADCAST_INT16X3(a) FFX_MIN16_I(a)
+#define FFX_BROADCAST_INT16X4(a) FFX_MIN16_I(a)
+
+//------------------------------------------------------------------------------------------------------------------------------
+#define FFX_BROADCAST_UINT16(a)   FFX_MIN16_U(a)
+#define FFX_BROADCAST_UINT16X2(a) FFX_MIN16_U(a)
+#define FFX_BROADCAST_UINT16X3(a) FFX_MIN16_U(a)
+#define FFX_BROADCAST_UINT16X4(a) FFX_MIN16_U(a)
+
+//==============================================================================================================================
+FFX_MIN16_U ffxAbsHalf(FFX_MIN16_U a)
+{
+	return FFX_MIN16_U(abs(FFX_MIN16_I(a)));
+}
+FFX_MIN16_U2 ffxAbsHalf(FFX_MIN16_U2 a)
+{
+	return FFX_MIN16_U2(abs(FFX_MIN16_I2(a)));
+}
+FFX_MIN16_U3 ffxAbsHalf(FFX_MIN16_U3 a)
+{
+	return FFX_MIN16_U3(abs(FFX_MIN16_I3(a)));
+}
+FFX_MIN16_U4 ffxAbsHalf(FFX_MIN16_U4 a)
+{
+	return FFX_MIN16_U4(abs(FFX_MIN16_I4(a)));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxClampHalf(FFX_MIN16_F x, FFX_MIN16_F n, FFX_MIN16_F m)
+{
+	return max(n, min(x, m));
+}
+FFX_MIN16_F2 ffxClampHalf(FFX_MIN16_F2 x, FFX_MIN16_F2 n, FFX_MIN16_F2 m)
+{
+	return max(n, min(x, m));
+}
+FFX_MIN16_F3 ffxClampHalf(FFX_MIN16_F3 x, FFX_MIN16_F3 n, FFX_MIN16_F3 m)
+{
+	return max(n, min(x, m));
+}
+FFX_MIN16_F4 ffxClampHalf(FFX_MIN16_F4 x, FFX_MIN16_F4 n, FFX_MIN16_F4 m)
+{
+	return max(n, min(x, m));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+// V_FRACT_F16 (note DX frac() is different).
+FFX_MIN16_F ffxFract(FFX_MIN16_F x)
+{
+	return x - floor(x);
+}
+FFX_MIN16_F2 ffxFract(FFX_MIN16_F2 x)
+{
+	return x - floor(x);
+}
+FFX_MIN16_F3 ffxFract(FFX_MIN16_F3 x)
+{
+	return x - floor(x);
+}
+FFX_MIN16_F4 ffxFract(FFX_MIN16_F4 x)
+{
+	return x - floor(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxLerp(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F2 ffxLerp(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F3 ffxLerp(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F a)
+{
+	return lerp(x, y, a);
+}
+FFX_MIN16_F4 ffxLerp(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 a)
+{
+	return lerp(x, y, a);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxMax3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
+{
+	return max(x, max(y, z));
+}
+FFX_MIN16_F2 ffxMax3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
+{
+	return max(x, max(y, z));
+}
+FFX_MIN16_F3 ffxMax3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
+{
+	return max(x, max(y, z));
+}
+FFX_MIN16_F4 ffxMax3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
+{
+	return max(x, max(y, z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxMin3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
+{
+	return min(x, min(y, z));
+}
+FFX_MIN16_F2 ffxMin3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
+{
+	return min(x, min(y, z));
+}
+FFX_MIN16_F3 ffxMin3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
+{
+	return min(x, min(y, z));
+}
+FFX_MIN16_F4 ffxMin3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
+{
+	return min(x, min(y, z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxMed3Half(FFX_MIN16_F x, FFX_MIN16_F y, FFX_MIN16_F z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_F2 ffxMed3Half(FFX_MIN16_F2 x, FFX_MIN16_F2 y, FFX_MIN16_F2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_F3 ffxMed3Half(FFX_MIN16_F3 x, FFX_MIN16_F3 y, FFX_MIN16_F3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_F4 ffxMed3Half(FFX_MIN16_F4 x, FFX_MIN16_F4 y, FFX_MIN16_F4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_I ffxMed3Half(FFX_MIN16_I x, FFX_MIN16_I y, FFX_MIN16_I z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_I2 ffxMed3Half(FFX_MIN16_I2 x, FFX_MIN16_I2 y, FFX_MIN16_I2 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_I3 ffxMed3Half(FFX_MIN16_I3 x, FFX_MIN16_I3 y, FFX_MIN16_I3 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+FFX_MIN16_I4 ffxMed3Half(FFX_MIN16_I4 x, FFX_MIN16_I4 y, FFX_MIN16_I4 z)
+{
+    return max(min(x, y), min(max(x, y), z));
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxReciprocalHalf(FFX_MIN16_F x)
+{
+	return rcp(x);
+}
+FFX_MIN16_F2 ffxReciprocalHalf(FFX_MIN16_F2 x)
+{
+	return rcp(x);
+}
+FFX_MIN16_F3 ffxReciprocalHalf(FFX_MIN16_F3 x)
+{
+	return rcp(x);
+}
+FFX_MIN16_F4 ffxReciprocalHalf(FFX_MIN16_F4 x)
+{
+	return rcp(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxReciprocalSquareRootHalf(FFX_MIN16_F x)
+{
+	return rsqrt(x);
+}
+FFX_MIN16_F2 ffxReciprocalSquareRootHalf(FFX_MIN16_F2 x)
+{
+	return rsqrt(x);
+}
+FFX_MIN16_F3 ffxReciprocalSquareRootHalf(FFX_MIN16_F3 x)
+{
+	return rsqrt(x);
+}
+FFX_MIN16_F4 ffxReciprocalSquareRootHalf(FFX_MIN16_F4 x)
+{
+	return rsqrt(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_F ffxSaturate(FFX_MIN16_F x)
+{
+	return saturate(x);
+}
+FFX_MIN16_F2 ffxSaturate(FFX_MIN16_F2 x)
+{
+	return saturate(x);
+}
+FFX_MIN16_F3 ffxSaturate(FFX_MIN16_F3 x)
+{
+	return saturate(x);
+}
+FFX_MIN16_F4 ffxSaturate(FFX_MIN16_F4 x)
+{
+	return saturate(x);
+}
+//------------------------------------------------------------------------------------------------------------------------------
+FFX_MIN16_U ffxBitShiftRightHalf(FFX_MIN16_U a, FFX_MIN16_U b)
+{
+	return FFX_MIN16_U(FFX_MIN16_I(a) >> FFX_MIN16_I(b));
+}
+FFX_MIN16_U2 ffxBitShiftRightHalf(FFX_MIN16_U2 a, FFX_MIN16_U2 b)
+{
+	return FFX_MIN16_U2(FFX_MIN16_I2(a) >> FFX_MIN16_I2(b));
+}
+FFX_MIN16_U3 ffxBitShiftRightHalf(FFX_MIN16_U3 a, FFX_MIN16_U3 b)
+{
+	return FFX_MIN16_U3(FFX_MIN16_I3(a) >> FFX_MIN16_I3(b));
+}
+FFX_MIN16_U4 ffxBitShiftRightHalf(FFX_MIN16_U4 a, FFX_MIN16_U4 b)
+{
+	return FFX_MIN16_U4(FFX_MIN16_I4(a) >> FFX_MIN16_I4(b));
+}
+#endif // FFX_HALF
+
+//==============================================================================================================================
+//                                                         HLSL WAVE
+//==============================================================================================================================
+#if defined(FFX_WAVE)
+// Where 'x' must be a compile time literal.
+FfxFloat32 AWaveXorF1(FfxFloat32 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x2 AWaveXorF2(FfxFloat32x2 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x3 AWaveXorF3(FfxFloat32x3 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxFloat32x4 AWaveXorF4(FfxFloat32x4 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32 AWaveXorU1(FfxUInt32 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x2 AWaveXorU1(FfxUInt32x2 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x3 AWaveXorU1(FfxUInt32x3 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+FfxUInt32x4 AWaveXorU1(FfxUInt32x4 v, FfxUInt32 x)
+{
+    return WaveReadLaneAt(v, WaveGetLaneIndex() ^ x);
+}
+
+#if FFX_HALF
+FfxFloat16x2 ffxWaveXorFloat16x2(FfxFloat16x2 v, FfxUInt32 x)
+{
+    return FFX_UINT32_TO_FLOAT16X2(WaveReadLaneAt(FFX_FLOAT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
+}
+FfxFloat16x4 ffxWaveXorFloat16x4(FfxFloat16x4 v, FfxUInt32 x)
+{
+    return FFX_UINT32X2_TO_FLOAT16X4(WaveReadLaneAt(FFX_FLOAT16X4_TO_UINT32X2(v), WaveGetLaneIndex() ^ x));
+}
+FfxUInt16x2 ffxWaveXorUint16x2(FfxUInt16x2 v, FfxUInt32 x)
+{
+    return FFX_UINT32_TO_UINT16X2(WaveReadLaneAt(FFX_UINT16X2_TO_UINT32(v), WaveGetLaneIndex() ^ x));
+}
+FfxUInt16x4 ffxWaveXorUint16x4(FfxUInt16x4 v, FfxUInt32 x)
+{
+    return AW4_FFX_UINT32(WaveReadLaneAt(FFX_UINT32_AW4(v), WaveGetLaneIndex() ^ x));
+}
+#endif // FFX_HALF
+#endif // #if defined(FFX_WAVE)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_core_portability.h b/thirdparty/amd-fsr2/shaders/ffx_core_portability.h
new file mode 100644
index 0000000000..45be05973a
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_core_portability.h
@@ -0,0 +1,50 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+FfxFloat32x3 opAAddOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+    d = a + ffxBroadcast3(b);
+    return d;
+}
+
+FfxFloat32x3 opACpyF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+    d = a;
+    return d;
+}
+
+FfxFloat32x3 opAMulF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32x3 b)
+{
+    d = a * b;
+    return d;
+}
+
+FfxFloat32x3 opAMulOneF3(FfxFloat32x3 d, FfxFloat32x3 a, FfxFloat32 b)
+{
+    d = a * ffxBroadcast3(b);
+    return d;
+}
+
+FfxFloat32x3 opARcpF3(FfxFloat32x3 d, FfxFloat32x3 a)
+{
+    d = rcp(a);
+    return d;
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr1.h b/thirdparty/amd-fsr2/shaders/ffx_fsr1.h
new file mode 100644
index 0000000000..1ac23cf3de
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr1.h
@@ -0,0 +1,1250 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifdef __clang__
+#pragma clang diagnostic ignored "-Wunused-variable"
+#endif
+
+/// Setup required constant values for EASU (works on CPU or GPU).
+///
+/// @param [out] con0
+/// @param [out] con1
+/// @param [out] con2
+/// @param [out] con3
+/// @param [in] inputViewportInPixelsX                  The rendered image resolution being upscaled in X dimension.
+/// @param [in] inputViewportInPixelsY                  The rendered image resolution being upscaled in Y dimension.
+/// @param [in] inputSizeInPixelsX                      The resolution of the resource containing the input image (useful for dynamic resolution) in X dimension.
+/// @param [in] inputSizeInPixelsY                      The resolution of the resource containing the input image (useful for dynamic resolution) in Y dimension.
+/// @param [in] outputSizeInPixelsX                     The display resolution which the input image gets upscaled to in X dimension.
+/// @param [in] outputSizeInPixelsY                     The display resolution which the input image gets upscaled to in Y dimension.
+/// 
+/// @ingroup FSR1
+FFX_STATIC void ffxFsrPopulateEasuConstants(
+    FFX_PARAMETER_INOUT FfxUInt32x4 con0,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con1,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con2,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con3,
+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY)
+{
+    // Output integer position to a pixel position in viewport.
+    con0[0] = ffxAsUInt32(inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX));
+    con0[1] = ffxAsUInt32(inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY));
+    con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5));
+    con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5));
+
+    // Viewport pixel position to normalized image space.
+    // This is used to get upper-left of 'F' tap.
+    con1[0] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsX));
+    con1[1] = ffxAsUInt32(ffxReciprocal(inputSizeInPixelsY));
+
+    // Centers of gather4, first offset from upper-left of 'F'.
+    //      +---+---+
+    //      |   |   |
+    //      +--(0)--+
+    //      | b | c |
+    //  +---F---+---+---+
+    //  | e | f | g | h |
+    //  +--(1)--+--(2)--+
+    //  | i | j | k | l |
+    //  +---+---+---+---+
+    //      | n | o |
+    //      +--(3)--+
+    //      |   |   |
+    //      +---+---+
+    con1[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX));
+    con1[3] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsY));
+
+    // These are from (0) instead of 'F'.
+    con2[0] = ffxAsUInt32(FfxFloat32(-1.0) * ffxReciprocal(inputSizeInPixelsX));
+    con2[1] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY));
+    con2[2] = ffxAsUInt32(FfxFloat32(1.0) * ffxReciprocal(inputSizeInPixelsX));
+    con2[3] = ffxAsUInt32(FfxFloat32(2.0) * ffxReciprocal(inputSizeInPixelsY));
+    con3[0] = ffxAsUInt32(FfxFloat32(0.0) * ffxReciprocal(inputSizeInPixelsX));
+    con3[1] = ffxAsUInt32(FfxFloat32(4.0) * ffxReciprocal(inputSizeInPixelsY));
+    con3[2] = con3[3] = 0;
+}
+
+/// Setup required constant values for EASU (works on CPU or GPU).
+///
+/// @param [out] con0
+/// @param [out] con1
+/// @param [out] con2
+/// @param [out] con3
+/// @param [in] inputViewportInPixelsX              The resolution of the input in the X dimension.
+/// @param [in] inputViewportInPixelsY              The resolution of the input in the Y dimension.
+/// @param [in] inputSizeInPixelsX                  The input size in pixels in the X dimension.
+/// @param [in] inputSizeInPixelsY                  The input size in pixels in the Y dimension.
+/// @param [in] outputSizeInPixelsX                 The output size in pixels in the X dimension.
+/// @param [in] outputSizeInPixelsY                 The output size in pixels in the Y dimension.
+/// @param [in] inputOffsetInPixelsX                The input image offset in the X dimension into the resource containing it (useful for dynamic resolution).
+/// @param [in] inputOffsetInPixelsY                The input image offset in the Y dimension into the resource containing it (useful for dynamic resolution).
+///
+/// @ingroup FSR1
+FFX_STATIC void ffxFsrPopulateEasuConstantsOffset(
+    FFX_PARAMETER_INOUT FfxUInt32x4 con0,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con1,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con2,
+    FFX_PARAMETER_INOUT FfxUInt32x4 con3,
+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputViewportInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputSizeInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 outputSizeInPixelsY,
+    FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsX,
+    FFX_PARAMETER_IN FfxFloat32 inputOffsetInPixelsY)
+{
+    ffxFsrPopulateEasuConstants(
+        con0,
+        con1,
+        con2,
+        con3,
+        inputViewportInPixelsX,
+        inputViewportInPixelsY,
+        inputSizeInPixelsX,
+        inputSizeInPixelsY,
+        outputSizeInPixelsX,
+        outputSizeInPixelsY);
+
+    // override 
+    con0[2] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsX * ffxReciprocal(outputSizeInPixelsX) - FfxFloat32(0.5) + inputOffsetInPixelsX);
+    con0[3] = ffxAsUInt32(FfxFloat32(0.5) * inputViewportInPixelsY * ffxReciprocal(outputSizeInPixelsY) - FfxFloat32(0.5) + inputOffsetInPixelsY);
+}
+
+#if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT)
+// Input callback prototypes, need to be implemented by calling shader
+FfxFloat32x4 FsrEasuRF(FfxFloat32x2 p);
+FfxFloat32x4 FsrEasuGF(FfxFloat32x2 p);
+FfxFloat32x4 FsrEasuBF(FfxFloat32x2 p);
+
+// Filtering for a given tap for the scalar.
+void fsrEasuTapFloat(
+    FFX_PARAMETER_INOUT FfxFloat32x3 accumulatedColor,   // Accumulated color, with negative lobe.
+    FFX_PARAMETER_INOUT FfxFloat32 accumulatedWeight,    // Accumulated weight.
+    FFX_PARAMETER_IN FfxFloat32x2 pixelOffset,           // Pixel offset from resolve position to tap.
+    FFX_PARAMETER_IN FfxFloat32x2 gradientDirection,     // Gradient direction.
+    FFX_PARAMETER_IN FfxFloat32x2 length,                // Length.
+    FFX_PARAMETER_IN FfxFloat32 negativeLobeStrength,    // Negative lobe strength.
+    FFX_PARAMETER_IN FfxFloat32 clippingPoint,           // Clipping point.
+    FFX_PARAMETER_IN FfxFloat32x3 color)                 // Tap color.
+{
+    // Rotate offset by direction.
+    FfxFloat32x2 rotatedOffset;
+    rotatedOffset.x = (pixelOffset.x * (gradientDirection.x)) + (pixelOffset.y * gradientDirection.y);
+    rotatedOffset.y = (pixelOffset.x * (-gradientDirection.y)) + (pixelOffset.y * gradientDirection.x);
+
+    // Anisotropy.
+    rotatedOffset *= length;
+
+    // Compute distance^2.
+    FfxFloat32 distanceSquared = rotatedOffset.x * rotatedOffset.x + rotatedOffset.y * rotatedOffset.y;
+
+    // Limit to the window as at corner, 2 taps can easily be outside.
+    distanceSquared = ffxMin(distanceSquared, clippingPoint);
+
+    // Approximation of lancos2 without sin() or rcp(), or sqrt() to get x.
+    //  (25/16 * (2/5 * x^2 - 1)^2 - (25/16 - 1)) * (1/4 * x^2 - 1)^2
+    //  |_______________________________________|   |_______________|
+    //                   base                             window
+    // The general form of the 'base' is,
+    //  (a*(b*x^2-1)^2-(a-1))
+    // Where 'a=1/(2*b-b^2)' and 'b' moves around the negative lobe.
+    FfxFloat32 weightB = FfxFloat32(2.0 / 5.0) * distanceSquared + FfxFloat32(-1.0);
+    FfxFloat32 weightA = negativeLobeStrength * distanceSquared + FfxFloat32(-1.0);
+    weightB *= weightB;
+    weightA *= weightA;
+    weightB = FfxFloat32(25.0 / 16.0) * weightB + FfxFloat32(-(25.0 / 16.0 - 1.0));
+    FfxFloat32 weight = weightB * weightA;
+
+    // Do weighted average.
+    accumulatedColor += color * weight;
+    accumulatedWeight += weight;
+}
+
+// Accumulate direction and length.
+void fsrEasuSetFloat(
+    FFX_PARAMETER_INOUT FfxFloat32x2 direction,
+    FFX_PARAMETER_INOUT FfxFloat32 length,
+    FFX_PARAMETER_IN FfxFloat32x2 pp,
+    FFX_PARAMETER_IN FfxBoolean biS,
+    FFX_PARAMETER_IN FfxBoolean biT,
+    FFX_PARAMETER_IN FfxBoolean biU,
+    FFX_PARAMETER_IN FfxBoolean biV,
+    FFX_PARAMETER_IN FfxFloat32 lA,
+    FFX_PARAMETER_IN FfxFloat32 lB,
+    FFX_PARAMETER_IN FfxFloat32 lC,
+    FFX_PARAMETER_IN FfxFloat32 lD,
+    FFX_PARAMETER_IN FfxFloat32 lE)
+{
+    // Compute bilinear weight, branches factor out as predicates are compiler time immediates.
+    //  s t
+    //  u v
+    FfxFloat32 weight = FfxFloat32(0.0);
+    if (biS)
+        weight = (FfxFloat32(1.0) - pp.x) * (FfxFloat32(1.0) - pp.y);
+    if (biT)
+        weight = pp.x * (FfxFloat32(1.0) - pp.y);
+    if (biU)
+        weight = (FfxFloat32(1.0) - pp.x) * pp.y;
+    if (biV)
+        weight = pp.x * pp.y;
+
+    // Direction is the '+' diff.
+    //    a
+    //  b c d
+    //    e
+    // Then takes magnitude from abs average of both sides of 'c'.
+    // Length converts gradient reversal to 0, smoothly to non-reversal at 1, shaped, then adding horz and vert terms.
+    FfxFloat32 dc = lD - lC;
+    FfxFloat32 cb = lC - lB;
+    FfxFloat32 lengthX = max(abs(dc), abs(cb));
+    lengthX = ffxApproximateReciprocal(lengthX);
+    FfxFloat32 directionX = lD - lB;
+    direction.x += directionX * weight;
+    lengthX = ffxSaturate(abs(directionX) * lengthX);
+    lengthX *= lengthX;
+    length += lengthX * weight;
+
+    // Repeat for the y axis.
+    FfxFloat32 ec = lE - lC;
+    FfxFloat32 ca = lC - lA;
+    FfxFloat32 lengthY = max(abs(ec), abs(ca));
+    lengthY = ffxApproximateReciprocal(lengthY);
+    FfxFloat32 directionY = lE - lA;
+    direction.y += directionY * weight;
+    lengthY = ffxSaturate(abs(directionY) * lengthY);
+    lengthY *= lengthY;
+    length += lengthY * weight;
+}
+
+/// Apply edge-aware spatial upsampling using 32bit floating point precision calculations.
+///
+/// @param [out] outPixel               The computed color of a pixel.
+/// @param [in]  integerPosition        Integer pixel position within the output.
+/// @param [in]  con0                   The first constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
+/// @param [in]  con1                   The second constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
+/// @param [in]  con2                   The third constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
+/// @param [in]  con3                   The fourth constant value generated by <c><i>ffxFsrPopulateEasuConstants</i></c>.
+/// 
+/// @ingroup FSR
+void ffxFsrEasuFloat(
+    FFX_PARAMETER_OUT FfxFloat32x3 pix,
+    FFX_PARAMETER_IN FfxUInt32x2 ip,
+    FFX_PARAMETER_IN FfxUInt32x4 con0,
+    FFX_PARAMETER_IN FfxUInt32x4 con1,
+    FFX_PARAMETER_IN FfxUInt32x4 con2,
+    FFX_PARAMETER_IN FfxUInt32x4 con3)
+{
+    // Get position of 'f'.
+    FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw);
+    FfxFloat32x2 fp = floor(pp);
+    pp -= fp;
+
+    // 12-tap kernel.
+    //    b c
+    //  e f g h
+    //  i j k l
+    //    n o
+    // Gather 4 ordering.
+    //  a b
+    //  r g
+    // For packed FP16, need either {rg} or {ab} so using the following setup for gather in all versions,
+    //    a b    <- unused (z)
+    //    r g
+    //  a b a b
+    //  r g r g
+    //    a b
+    //    r g    <- unused (z)
+    // Allowing dead-code removal to remove the 'z's.
+    FfxFloat32x2 p0 = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw);
+
+    // These are from p0 to avoid pulling two constants on pre-Navi hardware.
+    FfxFloat32x2 p1    = p0 + ffxAsFloat(con2.xy);
+    FfxFloat32x2 p2    = p0 + ffxAsFloat(con2.zw);
+    FfxFloat32x2 p3    = p0 + ffxAsFloat(con3.xy);
+    FfxFloat32x4 bczzR = FsrEasuRF(p0);
+    FfxFloat32x4 bczzG = FsrEasuGF(p0);
+    FfxFloat32x4 bczzB = FsrEasuBF(p0);
+    FfxFloat32x4 ijfeR = FsrEasuRF(p1);
+    FfxFloat32x4 ijfeG = FsrEasuGF(p1);
+    FfxFloat32x4 ijfeB = FsrEasuBF(p1);
+    FfxFloat32x4 klhgR = FsrEasuRF(p2);
+    FfxFloat32x4 klhgG = FsrEasuGF(p2);
+    FfxFloat32x4 klhgB = FsrEasuBF(p2);
+    FfxFloat32x4 zzonR = FsrEasuRF(p3);
+    FfxFloat32x4 zzonG = FsrEasuGF(p3);
+    FfxFloat32x4 zzonB = FsrEasuBF(p3);
+
+    // Simplest multi-channel approximate luma possible (luma times 2, in 2 FMA/MAD).
+    FfxFloat32x4 bczzL = bczzB * ffxBroadcast4(0.5) + (bczzR * ffxBroadcast4(0.5) + bczzG);
+    FfxFloat32x4 ijfeL = ijfeB * ffxBroadcast4(0.5) + (ijfeR * ffxBroadcast4(0.5) + ijfeG);
+    FfxFloat32x4 klhgL = klhgB * ffxBroadcast4(0.5) + (klhgR * ffxBroadcast4(0.5) + klhgG);
+    FfxFloat32x4 zzonL = zzonB * ffxBroadcast4(0.5) + (zzonR * ffxBroadcast4(0.5) + zzonG);
+
+    // Rename.
+    FfxFloat32 bL = bczzL.x;
+    FfxFloat32 cL = bczzL.y;
+    FfxFloat32 iL = ijfeL.x;
+    FfxFloat32 jL = ijfeL.y;
+    FfxFloat32 fL = ijfeL.z;
+    FfxFloat32 eL = ijfeL.w;
+    FfxFloat32 kL = klhgL.x;
+    FfxFloat32 lL = klhgL.y;
+    FfxFloat32 hL = klhgL.z;
+    FfxFloat32 gL = klhgL.w;
+    FfxFloat32 oL = zzonL.z;
+    FfxFloat32 nL = zzonL.w;
+
+    // Accumulate for bilinear interpolation.
+    FfxFloat32x2 dir = ffxBroadcast2(0.0);
+    FfxFloat32  len = FfxFloat32(0.0);
+    fsrEasuSetFloat(dir, len, pp, FFX_TRUE,  FFX_FALSE, FFX_FALSE, FFX_FALSE, bL, eL, fL, gL, jL);
+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_TRUE,  FFX_FALSE, FFX_FALSE, cL, fL, gL, hL, kL);
+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_TRUE,  FFX_FALSE, fL, iL, jL, kL, nL);
+    fsrEasuSetFloat(dir, len, pp, FFX_FALSE, FFX_FALSE, FFX_FALSE, FFX_TRUE,  gL, jL, kL, lL, oL);
+
+    // Normalize with approximation, and cleanup close to zero.
+    FfxFloat32x2 dir2 = dir * dir;
+    FfxFloat32 dirR = dir2.x + dir2.y;
+    FfxUInt32 zro  = dirR < FfxFloat32(1.0 / 32768.0);
+    dirR = ffxApproximateReciprocalSquareRoot(dirR);
+    dirR = zro ? FfxFloat32(1.0) : dirR;
+    dir.x = zro ? FfxFloat32(1.0) : dir.x;
+    dir *= ffxBroadcast2(dirR);
+
+    // Transform from {0 to 2} to {0 to 1} range, and shape with square.
+    len = len * FfxFloat32(0.5);
+    len *= len;
+
+    // Stretch kernel {1.0 vert|horz, to sqrt(2.0) on diagonal}.
+    FfxFloat32 stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocal(max(abs(dir.x), abs(dir.y)));
+
+    // Anisotropic length after rotation,
+    //  x := 1.0 lerp to 'stretch' on edges
+    //  y := 1.0 lerp to 2x on edges
+    FfxFloat32x2 len2 = FfxFloat32x2(FfxFloat32(1.0) + (stretch - FfxFloat32(1.0)) * len, FfxFloat32(1.0) + FfxFloat32(-0.5) * len);
+
+    // Based on the amount of 'edge',
+    // the window shifts from +/-{sqrt(2.0) to slightly beyond 2.0}.
+    FfxFloat32 lob = FfxFloat32(0.5) + FfxFloat32((1.0 / 4.0 - 0.04) - 0.5) * len;
+
+    // Set distance^2 clipping point to the end of the adjustable window.
+    FfxFloat32 clp = ffxApproximateReciprocal(lob);
+
+    // Accumulation mixed with min/max of 4 nearest.
+    //    b c
+    //  e f g h
+    //  i j k l
+    //    n o
+    FfxFloat32x3 min4 =
+        ffxMin(ffxMin3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)),
+               FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));
+    FfxFloat32x3 max4 =
+        max(ffxMax3(FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z), FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w), FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y)), FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));
+
+    // Accumulation.
+    FfxFloat32x3 aC = ffxBroadcast3(0.0);
+    FfxFloat32  aW = FfxFloat32(0.0);
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.x, bczzG.x, bczzB.x));  // b
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, -1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(bczzR.y, bczzG.y, bczzB.y));  // c
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.x, ijfeG.x, ijfeB.x));  // i
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.y, ijfeG.y, ijfeB.y));   // j
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.z, ijfeG.z, ijfeB.z));   // f
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(-1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(ijfeR.w, ijfeG.w, ijfeB.w));  // e
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.x, klhgG.x, klhgB.x));   // k
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 1.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.y, klhgG.y, klhgB.y));   // l
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(2.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.z, klhgG.z, klhgB.z));   // h
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 0.0) - pp, dir, len2, lob, clp, FfxFloat32x3(klhgR.w, klhgG.w, klhgB.w));   // g
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(1.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.z, zzonG.z, zzonB.z));   // o
+    fsrEasuTapFloat(aC, aW, FfxFloat32x2(0.0, 2.0) - pp, dir, len2, lob, clp, FfxFloat32x3(zzonR.w, zzonG.w, zzonB.w));   // n
+
+    // Normalize and dering.
+    pix = ffxMin(max4, max(min4, aC * ffxBroadcast3(rcp(aW))));
+}
+#endif // #if defined(FFX_GPU) && defined(FFX_FSR_EASU_FLOAT)
+
+#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FFX_FSR_EASU_HALF)
+// Input callback prototypes, need to be implemented by calling shader
+FfxFloat16x4 FsrEasuRH(FfxFloat32x2 p);
+FfxFloat16x4 FsrEasuGH(FfxFloat32x2 p);
+FfxFloat16x4 FsrEasuBH(FfxFloat32x2 p);
+
+// This runs 2 taps in parallel.
+void FsrEasuTapH(
+    FFX_PARAMETER_INOUT FfxFloat16x2 aCR,
+    FFX_PARAMETER_INOUT FfxFloat16x2 aCG,
+    FFX_PARAMETER_INOUT FfxFloat16x2 aCB,
+    FFX_PARAMETER_INOUT FfxFloat16x2 aW,
+    FFX_PARAMETER_IN FfxFloat16x2 offX,
+    FFX_PARAMETER_IN FfxFloat16x2 offY,
+    FFX_PARAMETER_IN FfxFloat16x2 dir,
+    FFX_PARAMETER_IN FfxFloat16x2 len,
+    FFX_PARAMETER_IN FfxFloat16 lob,
+    FFX_PARAMETER_IN FfxFloat16 clp,
+    FFX_PARAMETER_IN FfxFloat16x2 cR,
+    FFX_PARAMETER_IN FfxFloat16x2 cG,
+    FFX_PARAMETER_IN FfxFloat16x2 cB)
+{
+    FfxFloat16x2 vX, vY;
+    vX = offX * dir.xx + offY * dir.yy;
+    vY = offX * (-dir.yy) + offY * dir.xx;
+    vX *= len.x;
+    vY *= len.y;
+    FfxFloat16x2 d2 = vX * vX + vY * vY;
+    d2              = min(d2, FFX_BROADCAST_FLOAT16X2(clp));
+    FfxFloat16x2 wB = FFX_BROADCAST_FLOAT16X2(2.0 / 5.0) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0);
+    FfxFloat16x2 wA = FFX_BROADCAST_FLOAT16X2(lob) * d2 + FFX_BROADCAST_FLOAT16X2(-1.0);
+    wB *= wB;
+    wA *= wA;
+    wB             = FFX_BROADCAST_FLOAT16X2(25.0 / 16.0) * wB + FFX_BROADCAST_FLOAT16X2(-(25.0 / 16.0 - 1.0));
+    FfxFloat16x2 w = wB * wA;
+    aCR += cR * w;
+    aCG += cG * w;
+    aCB += cB * w;
+    aW += w;
+}
+
+// This runs 2 taps in parallel.
+void FsrEasuSetH(
+    FFX_PARAMETER_INOUT FfxFloat16x2 dirPX,
+    FFX_PARAMETER_INOUT FfxFloat16x2  dirPY,
+    FFX_PARAMETER_INOUT FfxFloat16x2 lenP,
+    FFX_PARAMETER_IN FfxFloat16x2 pp,
+    FFX_PARAMETER_IN FfxBoolean biST,
+    FFX_PARAMETER_IN FfxBoolean biUV,
+    FFX_PARAMETER_IN FfxFloat16x2 lA,
+    FFX_PARAMETER_IN FfxFloat16x2 lB,
+    FFX_PARAMETER_IN FfxFloat16x2 lC,
+    FFX_PARAMETER_IN FfxFloat16x2 lD,
+    FFX_PARAMETER_IN FfxFloat16x2 lE)
+{
+    FfxFloat16x2 w = FFX_BROADCAST_FLOAT16X2(0.0);
+    
+    if (biST)
+        w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(FFX_BROADCAST_FLOAT16(1.0) - pp.y);
+
+    if (biUV)
+        w = (FfxFloat16x2(1.0, 0.0) + FfxFloat16x2(-pp.x, pp.x)) * FFX_BROADCAST_FLOAT16X2(pp.y);
+
+    // ABS is not free in the packed FP16 path.
+    FfxFloat16x2 dc   = lD - lC;
+    FfxFloat16x2 cb   = lC - lB;
+    FfxFloat16x2 lenX = max(abs(dc), abs(cb));
+    lenX              = ffxReciprocalHalf(lenX);
+
+    FfxFloat16x2 dirX = lD - lB;
+    dirPX += dirX * w;
+    lenX = ffxSaturate(abs(dirX) * lenX);
+    lenX *= lenX;
+    lenP += lenX * w;
+    FfxFloat16x2 ec   = lE - lC;
+    FfxFloat16x2 ca   = lC - lA;
+    FfxFloat16x2 lenY = max(abs(ec), abs(ca));
+    lenY              = ffxReciprocalHalf(lenY);
+    FfxFloat16x2 dirY = lE - lA;
+    dirPY += dirY * w;
+    lenY = ffxSaturate(abs(dirY) * lenY);
+    lenY *= lenY;
+    lenP += lenY * w;
+}
+
+void FsrEasuH(
+    FFX_PARAMETER_OUT FfxFloat16x3 pix, 
+    FFX_PARAMETER_IN FfxUInt32x2 ip,
+    FFX_PARAMETER_IN FfxUInt32x4 con0,
+    FFX_PARAMETER_IN FfxUInt32x4 con1,
+    FFX_PARAMETER_IN FfxUInt32x4 con2,
+    FFX_PARAMETER_IN FfxUInt32x4 con3)
+{
+    FfxFloat32x2 pp = FfxFloat32x2(ip) * ffxAsFloat(con0.xy) + ffxAsFloat(con0.zw);
+    FfxFloat32x2 fp = floor(pp);
+    pp -= fp;
+    FfxFloat16x2 ppp = FfxFloat16x2(pp);
+
+    FfxFloat32x2 p0    = fp * ffxAsFloat(con1.xy) + ffxAsFloat(con1.zw);
+    FfxFloat32x2 p1    = p0 + ffxAsFloat(con2.xy);
+    FfxFloat32x2 p2    = p0 + ffxAsFloat(con2.zw);
+    FfxFloat32x2 p3    = p0 + ffxAsFloat(con3.xy);
+    FfxFloat16x4 bczzR = FsrEasuRH(p0);
+    FfxFloat16x4 bczzG = FsrEasuGH(p0);
+    FfxFloat16x4 bczzB = FsrEasuBH(p0);
+    FfxFloat16x4 ijfeR = FsrEasuRH(p1);
+    FfxFloat16x4 ijfeG = FsrEasuGH(p1);
+    FfxFloat16x4 ijfeB = FsrEasuBH(p1);
+    FfxFloat16x4 klhgR = FsrEasuRH(p2);
+    FfxFloat16x4 klhgG = FsrEasuGH(p2);
+    FfxFloat16x4 klhgB = FsrEasuBH(p2);
+    FfxFloat16x4 zzonR = FsrEasuRH(p3);
+    FfxFloat16x4 zzonG = FsrEasuGH(p3);
+    FfxFloat16x4 zzonB = FsrEasuBH(p3);
+
+    FfxFloat16x4 bczzL = bczzB * FFX_BROADCAST_FLOAT16X4(0.5) + (bczzR * FFX_BROADCAST_FLOAT16X4(0.5) + bczzG);
+    FfxFloat16x4 ijfeL = ijfeB * FFX_BROADCAST_FLOAT16X4(0.5) + (ijfeR * FFX_BROADCAST_FLOAT16X4(0.5) + ijfeG);
+    FfxFloat16x4 klhgL = klhgB * FFX_BROADCAST_FLOAT16X4(0.5) + (klhgR * FFX_BROADCAST_FLOAT16X4(0.5) + klhgG);
+    FfxFloat16x4 zzonL = zzonB * FFX_BROADCAST_FLOAT16X4(0.5) + (zzonR * FFX_BROADCAST_FLOAT16X4(0.5) + zzonG);
+    FfxFloat16   bL    = bczzL.x;
+    FfxFloat16   cL    = bczzL.y;
+    FfxFloat16   iL    = ijfeL.x;
+    FfxFloat16   jL    = ijfeL.y;
+    FfxFloat16   fL    = ijfeL.z;
+    FfxFloat16   eL    = ijfeL.w;
+    FfxFloat16   kL    = klhgL.x;
+    FfxFloat16   lL    = klhgL.y;
+    FfxFloat16   hL    = klhgL.z;
+    FfxFloat16   gL    = klhgL.w;
+    FfxFloat16   oL    = zzonL.z;
+    FfxFloat16   nL    = zzonL.w;
+
+    // This part is different, accumulating 2 taps in parallel.
+    FfxFloat16x2 dirPX = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 dirPY = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 lenP  = FFX_BROADCAST_FLOAT16X2(0.0);
+    FsrEasuSetH(dirPX,
+                dirPY,
+                lenP,
+                ppp,
+                FfxUInt32(true),
+                FfxUInt32(false),
+                FfxFloat16x2(bL, cL),
+                FfxFloat16x2(eL, fL),
+                FfxFloat16x2(fL, gL),
+                FfxFloat16x2(gL, hL),
+                FfxFloat16x2(jL, kL));
+    FsrEasuSetH(dirPX,
+                dirPY,
+                lenP,
+                ppp,
+                FfxUInt32(false),
+                FfxUInt32(true),
+                FfxFloat16x2(fL, gL),
+                FfxFloat16x2(iL, jL),
+                FfxFloat16x2(jL, kL),
+                FfxFloat16x2(kL, lL),
+                FfxFloat16x2(nL, oL));
+    FfxFloat16x2 dir = FfxFloat16x2(dirPX.r + dirPX.g, dirPY.r + dirPY.g);
+    FfxFloat16   len = lenP.r + lenP.g;
+
+    FfxFloat16x2 dir2 = dir * dir;
+    FfxFloat16   dirR = dir2.x + dir2.y;
+    FfxBoolean   zro  = FfxBoolean(dirR < FFX_BROADCAST_FLOAT16(1.0 / 32768.0));
+    dirR              = ffxApproximateReciprocalSquareRootHalf(dirR);
+    dirR              = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dirR;
+    dir.x             = (zro > 0) ? FFX_BROADCAST_FLOAT16(1.0) : dir.x;
+    dir *= FFX_BROADCAST_FLOAT16X2(dirR);
+    len = len * FFX_BROADCAST_FLOAT16(0.5);
+    len *= len;
+    FfxFloat16   stretch = (dir.x * dir.x + dir.y * dir.y) * ffxApproximateReciprocalHalf(max(abs(dir.x), abs(dir.y)));
+    FfxFloat16x2 len2 =
+        FfxFloat16x2(FFX_BROADCAST_FLOAT16(1.0) + (stretch - FFX_BROADCAST_FLOAT16(1.0)) * len, FFX_BROADCAST_FLOAT16(1.0) + FFX_BROADCAST_FLOAT16(-0.5) * len);
+    FfxFloat16 lob = FFX_BROADCAST_FLOAT16(0.5) + FFX_BROADCAST_FLOAT16((1.0 / 4.0 - 0.04) - 0.5) * len;
+    FfxFloat16 clp = ffxApproximateReciprocalHalf(lob);
+
+    // FP16 is different, using packed trick to do min and max in same operation.
+    FfxFloat16x2 bothR =
+        max(max(FfxFloat16x2(-ijfeR.z, ijfeR.z), FfxFloat16x2(-klhgR.w, klhgR.w)), max(FfxFloat16x2(-ijfeR.y, ijfeR.y), FfxFloat16x2(-klhgR.x, klhgR.x)));
+    FfxFloat16x2 bothG =
+        max(max(FfxFloat16x2(-ijfeG.z, ijfeG.z), FfxFloat16x2(-klhgG.w, klhgG.w)), max(FfxFloat16x2(-ijfeG.y, ijfeG.y), FfxFloat16x2(-klhgG.x, klhgG.x)));
+    FfxFloat16x2 bothB =
+        max(max(FfxFloat16x2(-ijfeB.z, ijfeB.z), FfxFloat16x2(-klhgB.w, klhgB.w)), max(FfxFloat16x2(-ijfeB.y, ijfeB.y), FfxFloat16x2(-klhgB.x, klhgB.x)));
+
+    // This part is different for FP16, working pairs of taps at a time.
+    FfxFloat16x2 pR = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 pG = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 pB = FFX_BROADCAST_FLOAT16X2(0.0);
+    FfxFloat16x2 pW = FFX_BROADCAST_FLOAT16X2(0.0);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, 1.0) - ppp.xx, FfxFloat16x2(-1.0, -1.0) - ppp.yy, dir, len2, lob, clp, bczzR.xy, bczzG.xy, bczzB.xy);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(-1.0, 0.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, ijfeR.xy, ijfeG.xy, ijfeB.xy);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(0.0, -1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, ijfeR.zw, ijfeG.zw, ijfeB.zw);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 2.0) - ppp.xx, FfxFloat16x2(1.0, 1.0) - ppp.yy, dir, len2, lob, clp, klhgR.xy, klhgG.xy, klhgB.xy);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(2.0, 1.0) - ppp.xx, FfxFloat16x2(0.0, 0.0) - ppp.yy, dir, len2, lob, clp, klhgR.zw, klhgG.zw, klhgB.zw);
+    FsrEasuTapH(pR, pG, pB, pW, FfxFloat16x2(1.0, 0.0) - ppp.xx, FfxFloat16x2(2.0, 2.0) - ppp.yy, dir, len2, lob, clp, zzonR.zw, zzonG.zw, zzonB.zw);
+    FfxFloat16x3 aC = FfxFloat16x3(pR.x + pR.y, pG.x + pG.y, pB.x + pB.y);
+    FfxFloat16   aW = pW.x + pW.y;
+
+    // Slightly different for FP16 version due to combined min and max.
+    pix = min(FfxFloat16x3(bothR.y, bothG.y, bothB.y), max(-FfxFloat16x3(bothR.x, bothG.x, bothB.x), aC * FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(aW))));
+}
+#endif // #if defined(FFX_GPU) && defined(FFX_HALF) && defined(FFX_FSR_EASU_HALF)
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                      FSR - [RCAS] ROBUST CONTRAST ADAPTIVE SHARPENING
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// CAS uses a simplified mechanism to convert local contrast into a variable amount of sharpness.
+// RCAS uses a more exact mechanism, solving for the maximum local sharpness possible before clipping.
+// RCAS also has a built in process to limit sharpening of what it detects as possible noise.
+// RCAS sharper does not support scaling, as it should be applied after EASU scaling.
+// Pass EASU output straight into RCAS, no color conversions necessary.
+//------------------------------------------------------------------------------------------------------------------------------
+// RCAS is based on the following logic.
+// RCAS uses a 5 tap filter in a cross pattern (same as CAS),
+//    w                n
+//  w 1 w  for taps  w m e 
+//    w                s
+// Where 'w' is the negative lobe weight.
+//  output = (w*(n+e+w+s)+m)/(4*w+1)
+// RCAS solves for 'w' by seeing where the signal might clip out of the {0 to 1} input range,
+//  0 == (w*(n+e+w+s)+m)/(4*w+1) -> w = -m/(n+e+w+s)
+//  1 == (w*(n+e+w+s)+m)/(4*w+1) -> w = (1-m)/(n+e+w+s-4*1)
+// Then chooses the 'w' which results in no clipping, limits 'w', and multiplies by the 'sharp' amount.
+// This solution above has issues with MSAA input as the steps along the gradient cause edge detection issues.
+// So RCAS uses 4x the maximum and 4x the minimum (depending on equation)in place of the individual taps.
+// As well as switching from 'm' to either the minimum or maximum (depending on side), to help in energy conservation.
+// This stabilizes RCAS.
+// RCAS does a simple highpass which is normalized against the local contrast then shaped,
+//       0.25
+//  0.25  -1  0.25
+//       0.25
+// This is used as a noise detection filter, to reduce the effect of RCAS on grain, and focus on real edges.
+//
+//  GLSL example for the required callbacks :
+// 
+//  FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p){return FfxFloat16x4(imageLoad(imgSrc,FfxInt32x2(p)));}
+//  void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b)
+//  {
+//    //do any simple input color conversions here or leave empty if none needed
+//  }
+//  
+//  FsrRcasCon need to be called from the CPU or GPU to set up constants.
+//  Including a GPU example here, the 'con' value would be stored out to a constant buffer.
+// 
+//  FfxUInt32x4 con;
+//  FsrRcasCon(con,
+//   0.0); // The scale is {0.0 := maximum sharpness, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+// ---------------
+// RCAS sharpening supports a CAS-like pass-through alpha via,
+//  #define FSR_RCAS_PASSTHROUGH_ALPHA 1
+// RCAS also supports a define to enable a more expensive path to avoid some sharpening of noise.
+// Would suggest it is better to apply film grain after RCAS sharpening (and after scaling) instead of using this define,
+//  #define FSR_RCAS_DENOISE 1
+//==============================================================================================================================
+// This is set at the limit of providing unnatural results for sharpening.
+#define FSR_RCAS_LIMIT (0.25-(1.0/16.0))
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                      CONSTANT SETUP
+//==============================================================================================================================
+// Call to setup required constant values (works on CPU or GPU).
+ FFX_STATIC void FsrRcasCon(FfxUInt32x4 con,
+                            // The scale is {0.0 := maximum, to N>0, where N is the number of stops (halving) of the reduction of sharpness}.
+                            FfxFloat32 sharpness)
+ {
+     // Transform from stops to linear value.
+     sharpness = exp2(-sharpness);
+     FfxFloat32x2 hSharp  = {sharpness, sharpness};
+     con[0] = ffxAsUInt32(sharpness);
+     con[1] = packHalf2x16(hSharp);
+     con[2] = 0;
+     con[3] = 0;
+ }
+ ////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                   NON-PACKED 32-BIT VERSION
+//==============================================================================================================================
+#if defined(FFX_GPU)&&defined(FSR_RCAS_F)
+ // Input callback prototypes that need to be implemented by calling shader
+ FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p);
+ void FsrRcasInputF(inout FfxFloat32 r,inout FfxFloat32 g,inout FfxFloat32 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasF(out FfxFloat32 pixR,  // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+               out FfxFloat32 pixG,
+               out FfxFloat32 pixB,
+#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+               out FfxFloat32 pixA,
+#endif
+               FfxUInt32x2 ip,  // Integer pixel position in output.
+               FfxUInt32x4 con)
+ {  // Constant generated by RcasSetup().
+     // Algorithm uses minimal 3x3 pixel neighborhood.
+     //    b
+     //  d e f
+     //    h
+     FfxInt32x2   sp = FfxInt32x2(ip);
+     FfxFloat32x3 b  = FsrRcasLoadF(sp + FfxInt32x2(0, -1)).rgb;
+     FfxFloat32x3 d  = FsrRcasLoadF(sp + FfxInt32x2(-1, 0)).rgb;
+#ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+     FfxFloat32x4 ee = FsrRcasLoadF(sp);
+     FfxFloat32x3 e  = ee.rgb;
+     pixA            = ee.a;
+#else
+     FfxFloat32x3 e = FsrRcasLoadF(sp).rgb;
+#endif
+     FfxFloat32x3 f = FsrRcasLoadF(sp + FfxInt32x2(1, 0)).rgb;
+     FfxFloat32x3 h = FsrRcasLoadF(sp + FfxInt32x2(0, 1)).rgb;
+     // Rename (32-bit) or regroup (16-bit).
+     FfxFloat32 bR = b.r;
+     FfxFloat32 bG = b.g;
+     FfxFloat32 bB = b.b;
+     FfxFloat32 dR = d.r;
+     FfxFloat32 dG = d.g;
+     FfxFloat32 dB = d.b;
+     FfxFloat32 eR = e.r;
+     FfxFloat32 eG = e.g;
+     FfxFloat32 eB = e.b;
+     FfxFloat32 fR = f.r;
+     FfxFloat32 fG = f.g;
+     FfxFloat32 fB = f.b;
+     FfxFloat32 hR = h.r;
+     FfxFloat32 hG = h.g;
+     FfxFloat32 hB = h.b;
+     // Run optional input transform.
+     FsrRcasInputF(bR, bG, bB);
+     FsrRcasInputF(dR, dG, dB);
+     FsrRcasInputF(eR, eG, eB);
+     FsrRcasInputF(fR, fG, fB);
+     FsrRcasInputF(hR, hG, hB);
+     // Luma times 2.
+     FfxFloat32 bL = bB * FfxFloat32(0.5) + (bR * FfxFloat32(0.5) + bG);
+     FfxFloat32 dL = dB * FfxFloat32(0.5) + (dR * FfxFloat32(0.5) + dG);
+     FfxFloat32 eL = eB * FfxFloat32(0.5) + (eR * FfxFloat32(0.5) + eG);
+     FfxFloat32 fL = fB * FfxFloat32(0.5) + (fR * FfxFloat32(0.5) + fG);
+     FfxFloat32 hL = hB * FfxFloat32(0.5) + (hR * FfxFloat32(0.5) + hG);
+     // Noise detection.
+     FfxFloat32 nz = FfxFloat32(0.25) * bL + FfxFloat32(0.25) * dL + FfxFloat32(0.25) * fL + FfxFloat32(0.25) * hL - eL;
+     nz            = ffxSaturate(abs(nz) * ffxApproximateReciprocalMedium(ffxMax3(ffxMax3(bL, dL, eL), fL, hL) - ffxMin3(ffxMin3(bL, dL, eL), fL, hL)));
+     nz            = FfxFloat32(-0.5) * nz + FfxFloat32(1.0);
+     // Min and max of ring.
+     FfxFloat32 mn4R = ffxMin(ffxMin3(bR, dR, fR), hR);
+     FfxFloat32 mn4G = ffxMin(ffxMin3(bG, dG, fG), hG);
+     FfxFloat32 mn4B = ffxMin(ffxMin3(bB, dB, fB), hB);
+     FfxFloat32 mx4R = max(ffxMax3(bR, dR, fR), hR);
+     FfxFloat32 mx4G = max(ffxMax3(bG, dG, fG), hG);
+     FfxFloat32 mx4B = max(ffxMax3(bB, dB, fB), hB);
+     // Immediate constants for peak range.
+     FfxFloat32x2 peakC = FfxFloat32x2(1.0, -1.0 * 4.0);
+     // Limiters, these need to be high precision RCPs.
+     FfxFloat32 hitMinR = mn4R * rcp(FfxFloat32(4.0) * mx4R);
+     FfxFloat32 hitMinG = mn4G * rcp(FfxFloat32(4.0) * mx4G);
+     FfxFloat32 hitMinB = mn4B * rcp(FfxFloat32(4.0) * mx4B);
+     FfxFloat32 hitMaxR = (peakC.x - mx4R) * rcp(FfxFloat32(4.0) * mn4R + peakC.y);
+     FfxFloat32 hitMaxG = (peakC.x - mx4G) * rcp(FfxFloat32(4.0) * mn4G + peakC.y);
+     FfxFloat32 hitMaxB = (peakC.x - mx4B) * rcp(FfxFloat32(4.0) * mn4B + peakC.y);
+     FfxFloat32 lobeR   = max(-hitMinR, hitMaxR);
+     FfxFloat32 lobeG   = max(-hitMinG, hitMaxG);
+     FfxFloat32 lobeB   = max(-hitMinB, hitMaxB);
+     FfxFloat32 lobe    = max(FfxFloat32(-FSR_RCAS_LIMIT), ffxMin(ffxMax3(lobeR, lobeG, lobeB), FfxFloat32(0.0))) * ffxAsFloat
+     (con.x);
+ // Apply noise removal.
+#ifdef FSR_RCAS_DENOISE
+     lobe *= nz;
+#endif
+     // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+     FfxFloat32 rcpL = ffxApproximateReciprocalMedium(FfxFloat32(4.0) * lobe + FfxFloat32(1.0));
+     pixR            = (lobe * bR + lobe * dR + lobe * hR + lobe * fR + eR) * rcpL;
+     pixG            = (lobe * bG + lobe * dG + lobe * hG + lobe * fG + eG) * rcpL;
+     pixB            = (lobe * bB + lobe * dB + lobe * hB + lobe * fB + eB) * rcpL;
+     return;
+ }
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                  NON-PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(FFX_GPU) && FFX_HALF == 1 && defined(FSR_RCAS_H)
+ // Input callback prototypes that need to be implemented by calling shader
+ FfxFloat16x4 FsrRcasLoadH(FfxInt16x2 p);
+ void FsrRcasInputH(inout FfxFloat16 r,inout FfxFloat16 g,inout FfxFloat16 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasH(
+ out FfxFloat16 pixR, // Output values, non-vector so port between RcasFilter() and RcasFilterH() is easy.
+ out FfxFloat16 pixG,
+ out FfxFloat16 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+  out FfxFloat16 pixA,
+ #endif
+ FfxUInt32x2 ip, // Integer pixel position in output.
+ FfxUInt32x4 con){ // Constant generated by RcasSetup().
+  // Sharpening algorithm uses minimal 3x3 pixel neighborhood.
+  //    b 
+  //  d e f
+  //    h
+  FfxInt16x2 sp=FfxInt16x2(ip);
+  FfxFloat16x3 b=FsrRcasLoadH(sp+FfxInt16x2( 0,-1)).rgb;
+  FfxFloat16x3 d=FsrRcasLoadH(sp+FfxInt16x2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   FfxFloat16x4 ee=FsrRcasLoadH(sp);
+   FfxFloat16x3 e=ee.rgb;pixA=ee.a;
+  #else
+   FfxFloat16x3 e=FsrRcasLoadH(sp).rgb;
+  #endif
+  FfxFloat16x3 f=FsrRcasLoadH(sp+FfxInt16x2( 1, 0)).rgb;
+  FfxFloat16x3 h=FsrRcasLoadH(sp+FfxInt16x2( 0, 1)).rgb;
+  // Rename (32-bit) or regroup (16-bit).
+  FfxFloat16 bR=b.r;
+  FfxFloat16 bG=b.g;
+  FfxFloat16 bB=b.b;
+  FfxFloat16 dR=d.r;
+  FfxFloat16 dG=d.g;
+  FfxFloat16 dB=d.b;
+  FfxFloat16 eR=e.r;
+  FfxFloat16 eG=e.g;
+  FfxFloat16 eB=e.b;
+  FfxFloat16 fR=f.r;
+  FfxFloat16 fG=f.g;
+  FfxFloat16 fB=f.b;
+  FfxFloat16 hR=h.r;
+  FfxFloat16 hG=h.g;
+  FfxFloat16 hB=h.b;
+  // Run optional input transform.
+  FsrRcasInputH(bR,bG,bB);
+  FsrRcasInputH(dR,dG,dB);
+  FsrRcasInputH(eR,eG,eB);
+  FsrRcasInputH(fR,fG,fB);
+  FsrRcasInputH(hR,hG,hB);
+  // Luma times 2.
+  FfxFloat16 bL=bB*FFX_BROADCAST_FLOAT16(0.5)+(bR*FFX_BROADCAST_FLOAT16(0.5)+bG);
+  FfxFloat16 dL=dB*FFX_BROADCAST_FLOAT16(0.5)+(dR*FFX_BROADCAST_FLOAT16(0.5)+dG);
+  FfxFloat16 eL=eB*FFX_BROADCAST_FLOAT16(0.5)+(eR*FFX_BROADCAST_FLOAT16(0.5)+eG);
+  FfxFloat16 fL=fB*FFX_BROADCAST_FLOAT16(0.5)+(fR*FFX_BROADCAST_FLOAT16(0.5)+fG);
+  FfxFloat16 hL=hB*FFX_BROADCAST_FLOAT16(0.5)+(hR*FFX_BROADCAST_FLOAT16(0.5)+hG);
+  // Noise detection.
+  FfxFloat16 nz=FFX_BROADCAST_FLOAT16(0.25)*bL+FFX_BROADCAST_FLOAT16(0.25)*dL+FFX_BROADCAST_FLOAT16(0.25)*fL+FFX_BROADCAST_FLOAT16(0.25)*hL-eL;
+  nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)));
+  nz=FFX_BROADCAST_FLOAT16(-0.5)*nz+FFX_BROADCAST_FLOAT16(1.0);
+  // Min and max of ring.
+  FfxFloat16 mn4R=min(ffxMin3Half(bR,dR,fR),hR);
+  FfxFloat16 mn4G=min(ffxMin3Half(bG,dG,fG),hG);
+  FfxFloat16 mn4B=min(ffxMin3Half(bB,dB,fB),hB);
+  FfxFloat16 mx4R=max(ffxMax3Half(bR,dR,fR),hR);
+  FfxFloat16 mx4G=max(ffxMax3Half(bG,dG,fG),hG);
+  FfxFloat16 mx4B=max(ffxMax3Half(bB,dB,fB),hB);
+  // Immediate constants for peak range.
+  FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
+  // Limiters, these need to be high precision RCPs.
+  FfxFloat16 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4R);
+  FfxFloat16 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4G);
+  FfxFloat16 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mx4B);
+  FfxFloat16 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4R+peakC.y);
+  FfxFloat16 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4G+peakC.y);
+  FfxFloat16 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16(4.0)*mn4B+peakC.y);
+  FfxFloat16 lobeR=max(-hitMinR,hitMaxR);
+  FfxFloat16 lobeG=max(-hitMinG,hitMaxG);
+  FfxFloat16 lobeB=max(-hitMinB,hitMaxB);
+  FfxFloat16 lobe=max(FFX_BROADCAST_FLOAT16(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16(0.0)))*FFX_UINT32_TO_FLOAT16X2(con.y).x;
+  // Apply noise removal.
+  #ifdef FSR_RCAS_DENOISE
+   lobe*=nz;
+  #endif
+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+  FfxFloat16 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16(4.0)*lobe+FFX_BROADCAST_FLOAT16(1.0));
+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;
+}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//                                                     PACKED 16-BIT VERSION
+//==============================================================================================================================
+#if defined(FFX_GPU)&& FFX_HALF == 1 && defined(FSR_RCAS_HX2)
+ // Input callback prototypes that need to be implemented by the calling shader
+ FfxFloat16x4 FsrRcasLoadHx2(FfxInt16x2 p);
+ void FsrRcasInputHx2(inout FfxFloat16x2 r,inout FfxFloat16x2 g,inout FfxFloat16x2 b);
+//------------------------------------------------------------------------------------------------------------------------------
+ // Can be used to convert from packed Structures of Arrays to Arrays of Structures for store.
+ void FsrRcasDepackHx2(out FfxFloat16x4 pix0,out FfxFloat16x4 pix1,FfxFloat16x2 pixR,FfxFloat16x2 pixG,FfxFloat16x2 pixB){
+  #ifdef FFX_HLSL
+   // Invoke a slower path for DX only, since it won't allow uninitialized values.
+   pix0.a=pix1.a=0.0;
+  #endif
+  pix0.rgb=FfxFloat16x3(pixR.x,pixG.x,pixB.x);
+  pix1.rgb=FfxFloat16x3(pixR.y,pixG.y,pixB.y);}
+//------------------------------------------------------------------------------------------------------------------------------
+ void FsrRcasHx2(
+ // Output values are for 2 8x8 tiles in a 16x8 region.
+ //  pix<R,G,B>.x =  left 8x8 tile
+ //  pix<R,G,B>.y = right 8x8 tile
+ // This enables later processing to easily be packed as well.
+ out FfxFloat16x2 pixR,
+ out FfxFloat16x2 pixG,
+ out FfxFloat16x2 pixB,
+ #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+  out FfxFloat16x2 pixA,
+ #endif
+ FfxUInt32x2 ip, // Integer pixel position in output.
+ FfxUInt32x4 con){ // Constant generated by RcasSetup().
+  // No scaling algorithm uses minimal 3x3 pixel neighborhood.
+  FfxInt16x2 sp0=FfxInt16x2(ip);
+  FfxFloat16x3 b0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0,-1)).rgb;
+  FfxFloat16x3 d0=FsrRcasLoadHx2(sp0+FfxInt16x2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   FfxFloat16x4 ee0=FsrRcasLoadHx2(sp0);
+   FfxFloat16x3 e0=ee0.rgb;pixA.r=ee0.a;
+  #else
+   FfxFloat16x3 e0=FsrRcasLoadHx2(sp0).rgb;
+  #endif
+  FfxFloat16x3 f0=FsrRcasLoadHx2(sp0+FfxInt16x2( 1, 0)).rgb;
+  FfxFloat16x3 h0=FsrRcasLoadHx2(sp0+FfxInt16x2( 0, 1)).rgb;
+  FfxInt16x2 sp1=sp0+FfxInt16x2(8,0);
+  FfxFloat16x3 b1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0,-1)).rgb;
+  FfxFloat16x3 d1=FsrRcasLoadHx2(sp1+FfxInt16x2(-1, 0)).rgb;
+  #ifdef FSR_RCAS_PASSTHROUGH_ALPHA
+   FfxFloat16x4 ee1=FsrRcasLoadHx2(sp1);
+   FfxFloat16x3 e1=ee1.rgb;pixA.g=ee1.a;
+  #else
+   FfxFloat16x3 e1=FsrRcasLoadHx2(sp1).rgb;
+  #endif
+  FfxFloat16x3 f1=FsrRcasLoadHx2(sp1+FfxInt16x2( 1, 0)).rgb;
+  FfxFloat16x3 h1=FsrRcasLoadHx2(sp1+FfxInt16x2( 0, 1)).rgb;
+  // Arrays of Structures to Structures of Arrays conversion.
+  FfxFloat16x2 bR=FfxFloat16x2(b0.r,b1.r);
+  FfxFloat16x2 bG=FfxFloat16x2(b0.g,b1.g);
+  FfxFloat16x2 bB=FfxFloat16x2(b0.b,b1.b);
+  FfxFloat16x2 dR=FfxFloat16x2(d0.r,d1.r);
+  FfxFloat16x2 dG=FfxFloat16x2(d0.g,d1.g);
+  FfxFloat16x2 dB=FfxFloat16x2(d0.b,d1.b);
+  FfxFloat16x2 eR=FfxFloat16x2(e0.r,e1.r);
+  FfxFloat16x2 eG=FfxFloat16x2(e0.g,e1.g);
+  FfxFloat16x2 eB=FfxFloat16x2(e0.b,e1.b);
+  FfxFloat16x2 fR=FfxFloat16x2(f0.r,f1.r);
+  FfxFloat16x2 fG=FfxFloat16x2(f0.g,f1.g);
+  FfxFloat16x2 fB=FfxFloat16x2(f0.b,f1.b);
+  FfxFloat16x2 hR=FfxFloat16x2(h0.r,h1.r);
+  FfxFloat16x2 hG=FfxFloat16x2(h0.g,h1.g);
+  FfxFloat16x2 hB=FfxFloat16x2(h0.b,h1.b);
+  // Run optional input transform.
+  FsrRcasInputHx2(bR,bG,bB);
+  FsrRcasInputHx2(dR,dG,dB);
+  FsrRcasInputHx2(eR,eG,eB);
+  FsrRcasInputHx2(fR,fG,fB);
+  FsrRcasInputHx2(hR,hG,hB);
+  // Luma times 2.
+  FfxFloat16x2 bL=bB*FFX_BROADCAST_FLOAT16X2(0.5)+(bR*FFX_BROADCAST_FLOAT16X2(0.5)+bG);
+  FfxFloat16x2 dL=dB*FFX_BROADCAST_FLOAT16X2(0.5)+(dR*FFX_BROADCAST_FLOAT16X2(0.5)+dG);
+  FfxFloat16x2 eL=eB*FFX_BROADCAST_FLOAT16X2(0.5)+(eR*FFX_BROADCAST_FLOAT16X2(0.5)+eG);
+  FfxFloat16x2 fL=fB*FFX_BROADCAST_FLOAT16X2(0.5)+(fR*FFX_BROADCAST_FLOAT16X2(0.5)+fG);
+  FfxFloat16x2 hL=hB*FFX_BROADCAST_FLOAT16X2(0.5)+(hR*FFX_BROADCAST_FLOAT16X2(0.5)+hG);
+  // Noise detection.
+  FfxFloat16x2 nz=FFX_BROADCAST_FLOAT16X2(0.25)*bL+FFX_BROADCAST_FLOAT16X2(0.25)*dL+FFX_BROADCAST_FLOAT16X2(0.25)*fL+FFX_BROADCAST_FLOAT16X2(0.25)*hL-eL;
+  nz=ffxSaturate(abs(nz)*ffxApproximateReciprocalMediumHalf(ffxMax3Half(ffxMax3Half(bL,dL,eL),fL,hL)-ffxMin3Half(ffxMin3Half(bL,dL,eL),fL,hL)));
+  nz=FFX_BROADCAST_FLOAT16X2(-0.5)*nz+FFX_BROADCAST_FLOAT16X2(1.0);
+  // Min and max of ring.
+  FfxFloat16x2 mn4R=min(ffxMin3Half(bR,dR,fR),hR);
+  FfxFloat16x2 mn4G=min(ffxMin3Half(bG,dG,fG),hG);
+  FfxFloat16x2 mn4B=min(ffxMin3Half(bB,dB,fB),hB);
+  FfxFloat16x2 mx4R=max(ffxMax3Half(bR,dR,fR),hR);
+  FfxFloat16x2 mx4G=max(ffxMax3Half(bG,dG,fG),hG);
+  FfxFloat16x2 mx4B=max(ffxMax3Half(bB,dB,fB),hB);
+  // Immediate constants for peak range.
+  FfxFloat16x2 peakC=FfxFloat16x2(1.0,-1.0*4.0);
+  // Limiters, these need to be high precision RCPs.
+  FfxFloat16x2 hitMinR=mn4R*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4R);
+  FfxFloat16x2 hitMinG=mn4G*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4G);
+  FfxFloat16x2 hitMinB=mn4B*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mx4B);
+  FfxFloat16x2 hitMaxR=(peakC.x-mx4R)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4R+peakC.y);
+  FfxFloat16x2 hitMaxG=(peakC.x-mx4G)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4G+peakC.y);
+  FfxFloat16x2 hitMaxB=(peakC.x-mx4B)*ffxReciprocalHalf(FFX_BROADCAST_FLOAT16X2(4.0)*mn4B+peakC.y);
+  FfxFloat16x2 lobeR=max(-hitMinR,hitMaxR);
+  FfxFloat16x2 lobeG=max(-hitMinG,hitMaxG);
+  FfxFloat16x2 lobeB=max(-hitMinB,hitMaxB);
+  FfxFloat16x2 lobe=max(FFX_BROADCAST_FLOAT16X2(-FSR_RCAS_LIMIT),min(ffxMax3Half(lobeR,lobeG,lobeB),FFX_BROADCAST_FLOAT16X2(0.0)))*FFX_BROADCAST_FLOAT16X2(FFX_UINT32_TO_FLOAT16X2(con.y).x);
+  // Apply noise removal.
+  #ifdef FSR_RCAS_DENOISE
+   lobe*=nz;
+  #endif
+  // Resolve, which needs the medium precision rcp approximation to avoid visible tonality changes.
+  FfxFloat16x2 rcpL=ffxApproximateReciprocalMediumHalf(FFX_BROADCAST_FLOAT16X2(4.0)*lobe+FFX_BROADCAST_FLOAT16X2(1.0));
+  pixR=(lobe*bR+lobe*dR+lobe*hR+lobe*fR+eR)*rcpL;
+  pixG=(lobe*bG+lobe*dG+lobe*hG+lobe*fG+eG)*rcpL;
+  pixB=(lobe*bB+lobe*dB+lobe*hB+lobe*fB+eB)*rcpL;}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                          FSR - [LFGA] LINEAR FILM GRAIN APPLICATOR
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Adding output-resolution film grain after scaling is a good way to mask both rendering and scaling artifacts.
+// Suggest using tiled blue noise as film grain input, with peak noise frequency set for a specific look and feel.
+// The 'Lfga*()' functions provide a convenient way to introduce grain.
+// These functions limit grain based on distance to signal limits.
+// This is done so that the grain is temporally energy preserving, and thus won't modify image tonality.
+// Grain application should be done in a linear colorspace.
+// The grain should be temporally changing, but have a temporal sum per pixel that adds to zero (non-biased).
+//------------------------------------------------------------------------------------------------------------------------------
+// Usage,
+//   FsrLfga*(
+//    color, // In/out linear colorspace color {0 to 1} ranged.
+//    grain, // Per pixel grain texture value {-0.5 to 0.5} ranged, input is 3-channel to support colored grain.
+//    amount); // Amount of grain (0 to 1} ranged.
+//------------------------------------------------------------------------------------------------------------------------------
+// Example if grain texture is monochrome: 'FsrLfgaF(color,ffxBroadcast3(grain),amount)'
+//==============================================================================================================================
+#if defined(FFX_GPU)
+ // Maximum grain is the minimum distance to the signal limit.
+ void FsrLfgaF(inout FfxFloat32x3 c, FfxFloat32x3 t, FfxFloat32 a)
+ {
+     c += (t * ffxBroadcast3(a)) * ffxMin(ffxBroadcast3(1.0) - c, c);
+ }
+#endif
+//==============================================================================================================================
+#if defined(FFX_GPU)&& FFX_HALF == 1
+ // Half precision version (slower).
+ void FsrLfgaH(inout FfxFloat16x3 c, FfxFloat16x3 t, FfxFloat16 a)
+ {
+     c += (t * FFX_BROADCAST_FLOAT16X3(a)) * min(FFX_BROADCAST_FLOAT16X3(1.0) - c, c);
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ // Packed half precision version (faster).
+ void FsrLfgaHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 tR,FfxFloat16x2 tG,FfxFloat16x2 tB,FfxFloat16 a){
+  cR+=(tR*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cR,cR);cG+=(tG*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cG,cG);cB+=(tB*FFX_BROADCAST_FLOAT16X2(a))*min(FFX_BROADCAST_FLOAT16X2(1.0)-cB,cB);}
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                          FSR - [SRTM] SIMPLE REVERSIBLE TONE-MAPPER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// This provides a way to take linear HDR color {0 to FP16_MAX} and convert it into a temporary {0 to 1} ranged post-tonemapped linear.
+// The tonemapper preserves RGB ratio, which helps maintain HDR color bleed during filtering.
+//------------------------------------------------------------------------------------------------------------------------------
+// Reversible tonemapper usage,
+//  FsrSrtm*(color); // {0 to FP16_MAX} converted to {0 to 1}.
+//  FsrSrtmInv*(color); // {0 to 1} converted into {0 to 32768, output peak safe for FP16}.
+//==============================================================================================================================
+#if defined(FFX_GPU)
+ void FsrSrtmF(inout FfxFloat32x3 c)
+ {
+     c *= ffxBroadcast3(rcp(ffxMax3(c.r, c.g, c.b) + FfxFloat32(1.0)));
+ }
+ // The extra max solves the c=1.0 case (which is a /0).
+ void FsrSrtmInvF(inout FfxFloat32x3 c){c*=ffxBroadcast3(rcp(max(FfxFloat32(1.0/32768.0),FfxFloat32(1.0)-ffxMax3(c.r,c.g,c.b))));}
+#endif
+//==============================================================================================================================
+#if defined(FFX_GPU )&& FFX_HALF == 1
+ void FsrSrtmH(inout FfxFloat16x3 c)
+ {
+     c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(ffxMax3Half(c.r, c.g, c.b) + FFX_BROADCAST_FLOAT16(1.0)));
+ }
+ void FsrSrtmInvH(inout FfxFloat16x3 c)
+ {
+     c *= FFX_BROADCAST_FLOAT16X3(ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16(1.0 / 32768.0), FFX_BROADCAST_FLOAT16(1.0) - ffxMax3Half(c.r, c.g, c.b))));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrSrtmHx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB)
+ {
+     FfxFloat16x2 rcp = ffxReciprocalHalf(ffxMax3Half(cR, cG, cB) + FFX_BROADCAST_FLOAT16X2(1.0));
+     cR *= rcp;
+     cG *= rcp;
+     cB *= rcp;
+ }
+ void FsrSrtmInvHx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB)
+ {
+     FfxFloat16x2 rcp=ffxReciprocalHalf(max(FFX_BROADCAST_FLOAT16X2(1.0/32768.0),FFX_BROADCAST_FLOAT16X2(1.0)-ffxMax3Half(cR,cG,cB)));
+     cR*=rcp;
+     cG*=rcp;
+     cB*=rcp;
+ }
+#endif
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+//_____________________________________________________________/\_______________________________________________________________
+//==============================================================================================================================
+//
+//                                       FSR - [TEPD] TEMPORAL ENERGY PRESERVING DITHER
+//
+//------------------------------------------------------------------------------------------------------------------------------
+// Temporally energy preserving dithered {0 to 1} linear to gamma 2.0 conversion.
+// Gamma 2.0 is used so that the conversion back to linear is just to square the color.
+// The conversion comes in 8-bit and 10-bit modes, designed for output to 8-bit UNORM or 10:10:10:2 respectively.
+// Given good non-biased temporal blue noise as dither input,
+// the output dither will temporally conserve energy.
+// This is done by choosing the linear nearest step point instead of perceptual nearest.
+// See code below for details.
+//------------------------------------------------------------------------------------------------------------------------------
+// DX SPEC RULES FOR FLOAT->UNORM 8-BIT CONVERSION
+// ===============================================
+// - Output is 'FfxUInt32(floor(saturate(n)*255.0+0.5))'.
+// - Thus rounding is to nearest.
+// - NaN gets converted to zero.
+// - INF is clamped to {0.0 to 1.0}.
+//==============================================================================================================================
+#if defined(FFX_GPU)
+ // Hand tuned integer position to dither value, with more values than simple checkerboard.
+ // Only 32-bit has enough precision for this compddation.
+ // Output is {0 to <1}.
+ FfxFloat32 FsrTepdDitF(FfxUInt32x2 p, FfxUInt32 f)
+ {
+     FfxFloat32 x = FfxFloat32(p.x + f);
+     FfxFloat32 y = FfxFloat32(p.y);
+     // The 1.61803 golden ratio.
+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+     // Number designed to provide a good visual pattern.
+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+     x            = x * a + (y * b);
+     return ffxFract(x);
+ }
+  //------------------------------------------------------------------------------------------------------------------------------
+ // This version is 8-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC8F(inout FfxFloat32x3 c, FfxFloat32 dit)
+ {
+     FfxFloat32x3 n = ffxSqrt(c);
+     n              = floor(n * ffxBroadcast3(255.0)) * ffxBroadcast3(1.0 / 255.0);
+     FfxFloat32x3 a = n * n;
+     FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 255.0);
+     b              = b * b;
+     // Ratio of 'a' to 'b' required to produce 'c'.
+     // ffxApproximateReciprocal() won't work here (at least for very high dynamic ranges).
+     // ffxApproximateReciprocalMedium() is an IADD,FMA,MUL.
+     FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b);
+     // Use the ratio as a cutoff to choose 'a' or 'b'.
+     // ffxIsGreaterThanZero() is a MUL.
+     c = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 255.0));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ // This version is 10-bit gamma 2.0.
+ // The 'c' input is {0 to 1}.
+ // Output is {0 to 1} ready for image store.
+ void FsrTepdC10F(inout FfxFloat32x3 c, FfxFloat32 dit)
+ {
+     FfxFloat32x3 n = ffxSqrt(c);
+     n              = floor(n * ffxBroadcast3(1023.0)) * ffxBroadcast3(1.0 / 1023.0);
+     FfxFloat32x3 a = n * n;
+     FfxFloat32x3 b = n + ffxBroadcast3(1.0 / 1023.0);
+     b              = b * b;
+     FfxFloat32x3 r = (c - b) * ffxApproximateReciprocalMedium(a - b);
+     c              = ffxSaturate(n + ffxIsGreaterThanZero(ffxBroadcast3(dit) - r) * ffxBroadcast3(1.0 / 1023.0));
+ }
+#endif
+//==============================================================================================================================
+#if defined(FFX_GPU)&& FFX_HALF == 1
+ FfxFloat16 FsrTepdDitH(FfxUInt32x2 p, FfxUInt32 f)
+ {
+     FfxFloat32 x = FfxFloat32(p.x + f);
+     FfxFloat32 y = FfxFloat32(p.y);
+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+     x       = x * a + (y * b);
+     return FfxFloat16(ffxFract(x));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8H(inout FfxFloat16x3 c, FfxFloat16 dit)
+ {
+     FfxFloat16x3 n = sqrt(c);
+     n     = floor(n * FFX_BROADCAST_FLOAT16X3(255.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0);
+     FfxFloat16x3 a = n * n;
+     FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 255.0);
+     b     = b * b;
+     FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b);
+     c     = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 255.0));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10H(inout FfxFloat16x3 c, FfxFloat16 dit)
+ {
+     FfxFloat16x3 n = sqrt(c);
+     n     = floor(n * FFX_BROADCAST_FLOAT16X3(1023.0)) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0);
+     FfxFloat16x3 a = n * n;
+     FfxFloat16x3 b = n + FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0);
+     b     = b * b;
+     FfxFloat16x3 r = (c - b) * ffxApproximateReciprocalMediumHalf(a - b);
+     c     = ffxSaturate(n + ffxIsGreaterThanZeroHalf(FFX_BROADCAST_FLOAT16X3(dit) - r) * FFX_BROADCAST_FLOAT16X3(1.0 / 1023.0));
+ }
+ //==============================================================================================================================
+ // This computes dither for positions 'p' and 'p+{8,0}'.
+ FfxFloat16x2 FsrTepdDitHx2(FfxUInt32x2 p, FfxUInt32 f)
+ {
+     FfxFloat32x2 x;
+     x.x     = FfxFloat32(p.x + f);
+     x.y     = x.x + FfxFloat32(8.0);
+     FfxFloat32 y = FfxFloat32(p.y);
+     FfxFloat32 a = FfxFloat32((1.0 + ffxSqrt(5.0f)) / 2.0);
+     FfxFloat32 b = FfxFloat32(1.0 / 3.69);
+     x       = x * ffxBroadcast2(a) + ffxBroadcast2(y * b);
+     return FfxFloat16x2(ffxFract(x));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC8Hx2(inout FfxFloat16x2 cR, inout FfxFloat16x2 cG, inout FfxFloat16x2 cB, FfxFloat16x2 dit)
+ {
+     FfxFloat16x2 nR = sqrt(cR);
+     FfxFloat16x2 nG = sqrt(cG);
+     FfxFloat16x2 nB = sqrt(cB);
+     nR     = floor(nR * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     nG     = floor(nG * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     nB     = floor(nB * FFX_BROADCAST_FLOAT16X2(255.0)) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     FfxFloat16x2 aR = nR * nR;
+     FfxFloat16x2 aG = nG * nG;
+     FfxFloat16x2 aB = nB * nB;
+     FfxFloat16x2 bR = nR + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     bR     = bR * bR;
+     FfxFloat16x2 bG = nG + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     bG     = bG * bG;
+     FfxFloat16x2 bB = nB + FFX_BROADCAST_FLOAT16X2(1.0 / 255.0);
+     bB     = bB * bB;
+     FfxFloat16x2 rR = (cR - bR) * ffxApproximateReciprocalMediumHalf(aR - bR);
+     FfxFloat16x2 rG = (cG - bG) * ffxApproximateReciprocalMediumHalf(aG - bG);
+     FfxFloat16x2 rB = (cB - bB) * ffxApproximateReciprocalMediumHalf(aB - bB);
+     cR     = ffxSaturate(nR + ffxIsGreaterThanZeroHalf(dit - rR) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0));
+     cG     = ffxSaturate(nG + ffxIsGreaterThanZeroHalf(dit - rG) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0));
+     cB     = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 255.0));
+ }
+ //------------------------------------------------------------------------------------------------------------------------------
+ void FsrTepdC10Hx2(inout FfxFloat16x2 cR,inout FfxFloat16x2 cG,inout FfxFloat16x2 cB,FfxFloat16x2 dit){
+  FfxFloat16x2 nR=sqrt(cR);
+  FfxFloat16x2 nG=sqrt(cG);
+  FfxFloat16x2 nB=sqrt(cB);
+  nR=floor(nR*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
+  nG=floor(nG*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
+  nB=floor(nB*FFX_BROADCAST_FLOAT16X2(1023.0))*FFX_BROADCAST_FLOAT16X2(1.0/1023.0);
+  FfxFloat16x2 aR=nR*nR;
+  FfxFloat16x2 aG=nG*nG;
+  FfxFloat16x2 aB=nB*nB;
+  FfxFloat16x2 bR=nR+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bR=bR*bR;
+  FfxFloat16x2 bG=nG+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bG=bG*bG;
+  FfxFloat16x2 bB=nB+FFX_BROADCAST_FLOAT16X2(1.0/1023.0);bB=bB*bB;
+  FfxFloat16x2 rR=(cR-bR)*ffxApproximateReciprocalMediumHalf(aR-bR);
+  FfxFloat16x2 rG=(cG-bG)*ffxApproximateReciprocalMediumHalf(aG-bG);
+  FfxFloat16x2 rB=(cB-bB)*ffxApproximateReciprocalMediumHalf(aB-bB);
+  cR=ffxSaturate(nR+ffxIsGreaterThanZeroHalf(dit-rR)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0));
+  cG=ffxSaturate(nG+ffxIsGreaterThanZeroHalf(dit-rG)*FFX_BROADCAST_FLOAT16X2(1.0/1023.0));
+  cB                                                       = ffxSaturate(nB + ffxIsGreaterThanZeroHalf(dit - rB) * FFX_BROADCAST_FLOAT16X2(1.0 / 1023.0));
+}
+#endif
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
new file mode 100644
index 0000000000..7bd5892cb9
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate.h
@@ -0,0 +1,295 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_ACCUMULATE_H
+#define FFX_FSR2_ACCUMULATE_H
+
+FfxFloat32 GetPxHrVelocity(FfxFloat32x2 fMotionVector)
+{
+    return length(fMotionVector * DisplaySize());
+}
+#if FFX_HALF
+FFX_MIN16_F GetPxHrVelocity(FFX_MIN16_F2 fMotionVector)
+{
+    return length(fMotionVector * FFX_MIN16_F2(DisplaySize()));
+}
+#endif
+
+void Accumulate(const AccumulationPassCommonParams params, FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor, FfxFloat32x3 fAccumulation, FFX_PARAMETER_IN FfxFloat32x4 fUpsampledColorAndWeight)
+{
+    // Aviod invalid values when accumulation and upsampled weight is 0
+    fAccumulation = ffxMax(FSR2_EPSILON.xxx, fAccumulation + fUpsampledColorAndWeight.www);
+
+#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
+    //YCoCg -> RGB -> Tonemap -> YCoCg (Use RGB tonemapper to avoid color desaturation)
+    fUpsampledColorAndWeight.xyz = RGBToYCoCg(Tonemap(YCoCgToRGB(fUpsampledColorAndWeight.xyz)));
+    fHistoryColor = RGBToYCoCg(Tonemap(YCoCgToRGB(fHistoryColor)));
+#endif
+
+    const FfxFloat32x3 fAlpha = fUpsampledColorAndWeight.www / fAccumulation;
+    fHistoryColor = ffxLerp(fHistoryColor, fUpsampledColorAndWeight.xyz, fAlpha);
+
+    fHistoryColor = YCoCgToRGB(fHistoryColor);
+
+#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
+    fHistoryColor = InverseTonemap(fHistoryColor);
+#endif
+}
+
+void RectifyHistory(
+    const AccumulationPassCommonParams params,
+    RectificationBox clippingBox,
+    FFX_PARAMETER_INOUT FfxFloat32x3 fHistoryColor,
+    FFX_PARAMETER_INOUT FfxFloat32x3 fAccumulation,
+    FfxFloat32 fLockContributionThisFrame,
+    FfxFloat32 fTemporalReactiveFactor,
+    FfxFloat32 fLumaInstabilityFactor)
+{
+    FfxFloat32 fScaleFactorInfluence = ffxMin(20.0f, ffxPow(FfxFloat32(1.0f / length(DownscaleFactor().x * DownscaleFactor().y)), 3.0f));
+
+    const FfxFloat32 fVecolityFactor = ffxSaturate(params.fHrVelocity / 20.0f);
+    const FfxFloat32 fBoxScaleT = ffxMax(params.fDepthClipFactor, ffxMax(params.fAccumulationMask, fVecolityFactor));
+    FfxFloat32 fBoxScale = ffxLerp(fScaleFactorInfluence, 1.0f, fBoxScaleT);
+
+    FfxFloat32x3 fScaledBoxVec = clippingBox.boxVec * fBoxScale;
+    FfxFloat32x3 boxMin = clippingBox.boxCenter - fScaledBoxVec;
+    FfxFloat32x3 boxMax = clippingBox.boxCenter + fScaledBoxVec;
+    FfxFloat32x3 boxCenter = clippingBox.boxCenter;
+    FfxFloat32 boxVecSize = length(clippingBox.boxVec);
+
+    boxMin = ffxMax(clippingBox.aabbMin, boxMin);
+    boxMax = ffxMin(clippingBox.aabbMax, boxMax);
+
+    if (any(FFX_GREATER_THAN(boxMin, fHistoryColor)) || any(FFX_GREATER_THAN(fHistoryColor, boxMax))) {
+
+        const FfxFloat32x3 fClampedHistoryColor = clamp(fHistoryColor, boxMin, boxMax);
+
+        FfxFloat32x3 fHistoryContribution = ffxMax(fLumaInstabilityFactor, fLockContributionThisFrame).xxx;
+        
+        const FfxFloat32 fReactiveFactor = params.fDilatedReactiveFactor;
+        const FfxFloat32 fReactiveContribution = 1.0f - ffxPow(fReactiveFactor, 1.0f / 2.0f);
+        fHistoryContribution *= fReactiveContribution;
+
+        // Scale history color using rectification info, also using accumulation mask to avoid potential invalid color protection
+        fHistoryColor = ffxLerp(fClampedHistoryColor, fHistoryColor, ffxSaturate(fHistoryContribution));
+
+        // Scale accumulation using rectification info
+        const FfxFloat32x3 fAccumulationMin = ffxMin(fAccumulation, FFX_BROADCAST_FLOAT32X3(0.1f));
+        fAccumulation = ffxLerp(fAccumulationMin, fAccumulation, ffxSaturate(fHistoryContribution));
+    }
+}
+
+void WriteUpscaledOutput(FfxInt32x2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
+{
+    StoreUpscaledOutput(iPxHrPos, fUpscaledColor);
+}
+
+void FinalizeLockStatus(const AccumulationPassCommonParams params, FfxFloat32x2 fLockStatus, FfxFloat32 fUpsampledWeight)
+{
+    // we expect similar motion for next frame
+    // kill lock if that location is outside screen, avoid locks to be clamped to screen borders
+    FfxFloat32x2 fEstimatedUvNextFrame = params.fHrUv - params.fMotionVector;
+    if (IsUvInside(fEstimatedUvNextFrame) == false) {
+        KillLock(fLockStatus);
+    }
+    else {
+        // Decrease lock lifetime
+        const FfxFloat32 fLifetimeDecreaseLanczosMax = FfxFloat32(JitterSequenceLength()) * FfxFloat32(fAverageLanczosWeightPerFrame);
+        const FfxFloat32 fLifetimeDecrease = FfxFloat32(fUpsampledWeight / fLifetimeDecreaseLanczosMax);
+        fLockStatus[LOCK_LIFETIME_REMAINING] = ffxMax(FfxFloat32(0), fLockStatus[LOCK_LIFETIME_REMAINING] - fLifetimeDecrease);
+    }
+
+    StoreLockStatus(params.iPxHrPos, fLockStatus);
+}
+
+
+FfxFloat32x3 ComputeBaseAccumulationWeight(const AccumulationPassCommonParams params, FfxFloat32 fThisFrameReactiveFactor, FfxBoolean bInMotionLastFrame, FfxFloat32 fUpsampledWeight, LockState lockState)
+{
+    // Always assume max accumulation was reached
+    FfxFloat32 fBaseAccumulation = fMaxAccumulationLanczosWeight * FfxFloat32(params.bIsExistingSample) * (1.0f - fThisFrameReactiveFactor) * (1.0f - params.fDepthClipFactor);
+
+    fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight * 10.0f, ffxMax(FfxFloat32(bInMotionLastFrame), ffxSaturate(params.fHrVelocity * FfxFloat32(10)))));
+
+    fBaseAccumulation = ffxMin(fBaseAccumulation, ffxLerp(fBaseAccumulation, fUpsampledWeight, ffxSaturate(params.fHrVelocity / FfxFloat32(20))));
+
+    return fBaseAccumulation.xxx;
+}
+
+FfxFloat32 ComputeLumaInstabilityFactor(const AccumulationPassCommonParams params, RectificationBox clippingBox, FfxFloat32 fThisFrameReactiveFactor, FfxFloat32 fLuminanceDiff)
+{
+    const FfxFloat32 fUnormThreshold = 1.0f / 255.0f;
+    const FfxInt32 N_MINUS_1 = 0;
+    const FfxInt32 N_MINUS_2 = 1;
+    const FfxInt32 N_MINUS_3 = 2;
+    const FfxInt32 N_MINUS_4 = 3;
+
+    FfxFloat32 fCurrentFrameLuma = clippingBox.boxCenter.x;
+
+#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
+    fCurrentFrameLuma = fCurrentFrameLuma / (1.0f + ffxMax(0.0f, fCurrentFrameLuma));
+#endif
+
+    fCurrentFrameLuma = round(fCurrentFrameLuma * 255.0f) / 255.0f;
+
+    const FfxBoolean bSampleLumaHistory = (ffxMax(ffxMax(params.fDepthClipFactor, params.fAccumulationMask), fLuminanceDiff) < 0.1f) && (params.bIsNewSample == false);
+    FfxFloat32x4 fCurrentFrameLumaHistory = bSampleLumaHistory ? SampleLumaHistory(params.fReprojectedHrUv) : FFX_BROADCAST_FLOAT32X4(0.0f);
+
+    FfxFloat32 fLumaInstability = 0.0f;
+    FfxFloat32 fDiffs0 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[N_MINUS_1]);
+
+    FfxFloat32 fMin = abs(fDiffs0);
+
+    if (fMin >= fUnormThreshold)
+    {
+        for (int i = N_MINUS_2; i <= N_MINUS_4; i++) {
+            FfxFloat32 fDiffs1 = (fCurrentFrameLuma - fCurrentFrameLumaHistory[i]);
+
+            if (sign(fDiffs0) == sign(fDiffs1)) {
+                
+                // Scale difference to protect historically similar values
+                const FfxFloat32 fMinBias = 1.0f;
+                fMin = ffxMin(fMin, abs(fDiffs1) * fMinBias);
+            }
+        }
+
+        const FfxFloat32 fBoxSize = clippingBox.boxVec.x;
+        const FfxFloat32 fBoxSizeFactor = ffxPow(ffxSaturate(fBoxSize / 0.1f), 6.0f);
+
+        fLumaInstability = FfxFloat32(fMin != abs(fDiffs0)) * fBoxSizeFactor;
+        fLumaInstability = FfxFloat32(fLumaInstability > fUnormThreshold);
+
+        fLumaInstability *= 1.0f - ffxMax(params.fAccumulationMask, ffxPow(fThisFrameReactiveFactor, 1.0f / 6.0f));
+    }
+
+    //shift history
+    fCurrentFrameLumaHistory[N_MINUS_4] = fCurrentFrameLumaHistory[N_MINUS_3];
+    fCurrentFrameLumaHistory[N_MINUS_3] = fCurrentFrameLumaHistory[N_MINUS_2];
+    fCurrentFrameLumaHistory[N_MINUS_2] = fCurrentFrameLumaHistory[N_MINUS_1];
+    fCurrentFrameLumaHistory[N_MINUS_1] = fCurrentFrameLuma;
+
+    StoreLumaHistory(params.iPxHrPos, fCurrentFrameLumaHistory);
+
+    return fLumaInstability * FfxFloat32(fCurrentFrameLumaHistory[N_MINUS_4] != 0);
+}
+
+FfxFloat32 ComputeTemporalReactiveFactor(const AccumulationPassCommonParams params, FfxFloat32 fTemporalReactiveFactor)
+{
+    FfxFloat32 fNewFactor = ffxMin(0.99f, fTemporalReactiveFactor);
+
+    fNewFactor = ffxMax(fNewFactor, ffxLerp(fNewFactor, 0.4f, ffxSaturate(params.fHrVelocity)));
+
+    fNewFactor = ffxMax(fNewFactor * fNewFactor, ffxMax(params.fDepthClipFactor * 0.1f, params.fDilatedReactiveFactor));
+
+    // Force reactive factor for new samples
+    fNewFactor = params.bIsNewSample ? 1.0f : fNewFactor;
+
+    if (ffxSaturate(params.fHrVelocity * 10.0f) >= 1.0f) {
+        fNewFactor = ffxMax(FSR2_EPSILON, fNewFactor) * -1.0f;
+    }
+    
+    return fNewFactor;
+}
+
+AccumulationPassCommonParams InitParams(FfxInt32x2 iPxHrPos)
+{
+    AccumulationPassCommonParams params;
+
+    params.iPxHrPos = iPxHrPos;
+    const FfxFloat32x2 fHrUv = (iPxHrPos + 0.5f) / DisplaySize();
+    params.fHrUv = fHrUv;
+    
+    const FfxFloat32x2 fLrUvJittered = fHrUv + Jitter() / RenderSize();
+    params.fLrUv_HwSampler = ClampUv(fLrUvJittered, RenderSize(), MaxRenderSize());
+
+    params.fMotionVector = GetMotionVector(iPxHrPos, fHrUv);
+    params.fHrVelocity = GetPxHrVelocity(params.fMotionVector);
+
+    ComputeReprojectedUVs(params, params.fReprojectedHrUv, params.bIsExistingSample);
+
+    params.fDepthClipFactor = ffxSaturate(SampleDepthClip(params.fLrUv_HwSampler));
+    
+    const FfxFloat32x2 fDilatedReactiveMasks = SampleDilatedReactiveMasks(params.fLrUv_HwSampler);
+    params.fDilatedReactiveFactor = fDilatedReactiveMasks.x;
+    params.fAccumulationMask = fDilatedReactiveMasks.y;
+    params.bIsResetFrame = (0 == FrameIndex());
+
+    params.bIsNewSample = (params.bIsExistingSample == false || params.bIsResetFrame);
+
+    return params;
+}
+
+void Accumulate(FfxInt32x2 iPxHrPos)
+{
+    const AccumulationPassCommonParams params = InitParams(iPxHrPos);
+
+    FfxFloat32x3 fHistoryColor = FfxFloat32x3(0, 0, 0);
+    FfxFloat32x2 fLockStatus;
+    InitializeNewLockSample(fLockStatus);
+
+    FfxFloat32 fTemporalReactiveFactor = 0.0f;
+    FfxBoolean bInMotionLastFrame = FFX_FALSE;
+    LockState lockState = { FFX_FALSE , FFX_FALSE };
+    if (params.bIsExistingSample && !params.bIsResetFrame) {
+        ReprojectHistoryColor(params, fHistoryColor, fTemporalReactiveFactor, bInMotionLastFrame);
+        lockState = ReprojectHistoryLockStatus(params, fLockStatus);
+    }
+
+    FfxFloat32 fThisFrameReactiveFactor = ffxMax(params.fDilatedReactiveFactor, fTemporalReactiveFactor);
+
+    FfxFloat32 fLuminanceDiff = 0.0f;
+    FfxFloat32 fLockContributionThisFrame = 0.0f;
+    UpdateLockStatus(params, fThisFrameReactiveFactor, lockState, fLockStatus, fLockContributionThisFrame, fLuminanceDiff);
+
+    // Load upsampled input color
+    RectificationBox clippingBox;
+    FfxFloat32x4 fUpsampledColorAndWeight = ComputeUpsampledColorAndWeight(params, clippingBox, fThisFrameReactiveFactor);
+    
+    const FfxFloat32 fLumaInstabilityFactor = ComputeLumaInstabilityFactor(params, clippingBox, fThisFrameReactiveFactor, fLuminanceDiff);
+
+
+    FfxFloat32x3 fAccumulation = ComputeBaseAccumulationWeight(params, fThisFrameReactiveFactor, bInMotionLastFrame, fUpsampledColorAndWeight.w, lockState);
+
+    if (params.bIsNewSample) {
+        fHistoryColor = YCoCgToRGB(fUpsampledColorAndWeight.xyz);
+    }
+    else {
+        RectifyHistory(params, clippingBox, fHistoryColor, fAccumulation, fLockContributionThisFrame, fThisFrameReactiveFactor, fLumaInstabilityFactor);
+
+        Accumulate(params, fHistoryColor, fAccumulation, fUpsampledColorAndWeight);
+    }
+
+    fHistoryColor = UnprepareRgb(fHistoryColor, Exposure());
+
+    FinalizeLockStatus(params, fLockStatus, fUpsampledColorAndWeight.w);
+
+    // Get new temporal reactive factor
+    fTemporalReactiveFactor = ComputeTemporalReactiveFactor(params, fThisFrameReactiveFactor);
+
+    StoreInternalColorAndWeight(iPxHrPos, FfxFloat32x4(fHistoryColor, fTemporalReactiveFactor));
+
+    // Output final color when RCAS is disabled
+#if FFX_FSR2_OPTION_APPLY_SHARPENING == 0
+    WriteUpscaledOutput(iPxHrPos, fHistoryColor);
+#endif
+    StoreNewLocks(iPxHrPos, 0);
+}
+
+#endif // FFX_FSR2_ACCUMULATE_H
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
new file mode 100644
index 0000000000..d2306fec4c
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_accumulate_pass.glsl
@@ -0,0 +1,92 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_samplerless_texture_functions : require
+// Needed for rw_upscaled_output declaration
+#extension GL_EXT_shader_image_load_formatted : require
+
+#define FSR2_BIND_SRV_INPUT_EXPOSURE                         0
+#define FSR2_BIND_SRV_DILATED_REACTIVE_MASKS                 1
+#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS                 2
+#else
+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS                   2
+#endif
+#define FSR2_BIND_SRV_INTERNAL_UPSCALED                      3
+#define FSR2_BIND_SRV_LOCK_STATUS                            4
+#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR                   6
+#define FSR2_BIND_SRV_LUMA_INSTABILITY                       7
+#define FSR2_BIND_SRV_LANCZOS_LUT                            8
+#define FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT               9
+#define FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS                   10
+#define FSR2_BIND_SRV_AUTO_EXPOSURE                          11
+#define FSR2_BIND_SRV_LUMA_HISTORY                           12
+
+#define FSR2_BIND_UAV_INTERNAL_UPSCALED                      13
+#define FSR2_BIND_UAV_LOCK_STATUS                            14
+#define FSR2_BIND_UAV_UPSCALED_OUTPUT                        15
+#define FSR2_BIND_UAV_NEW_LOCKS                              16
+#define FSR2_BIND_UAV_LUMA_HISTORY                           17
+
+#define FSR2_BIND_CB_FSR2                                    18
+
+// -- GODOT start --
+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
+#define FSR2_BIND_SRV_INPUT_DEPTH                            5
+#endif
+// -- GODOT end --
+
+#include "ffx_fsr2_callbacks_glsl.h"
+#include "ffx_fsr2_common.h"
+#include "ffx_fsr2_sample.h"
+#include "ffx_fsr2_upsample.h"
+#include "ffx_fsr2_postprocess_lock_status.h"
+#include "ffx_fsr2_reproject.h"
+#include "ffx_fsr2_accumulate.h"
+
+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
+#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR2_NUM_THREADS
+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
+
+#endif // #ifndef FFX_FSR2_NUM_THREADS
+
+FFX_FSR2_NUM_THREADS
+void main()
+{
+	uvec2 uGroupId = gl_WorkGroupID.xy;
+    const uint GroupRows = (uint(DisplaySize().y) + FFX_FSR2_THREAD_GROUP_HEIGHT - 1) / FFX_FSR2_THREAD_GROUP_HEIGHT;
+    uGroupId.y = GroupRows - uGroupId.y - 1;
+
+    uvec2 uDispatchThreadId = uGroupId * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
+
+    Accumulate(ivec2(uDispatchThreadId));
+}
+\ No newline at end of file
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
new file mode 100644
index 0000000000..e62b445924
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_autogen_reactive_pass.glsl
@@ -0,0 +1,93 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_samplerless_texture_functions : require
+
+#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY                     0
+#define FSR2_BIND_SRV_INPUT_COLOR                           1
+#define FSR2_BIND_UAV_AUTOREACTIVE                          2
+#define FSR2_BIND_CB_REACTIVE                               3
+#define FSR2_BIND_CB_FSR2                                   4
+
+#include "ffx_fsr2_callbacks_glsl.h"
+#include "ffx_fsr2_common.h"
+
+// layout (set = 1, binding = FSR2_BIND_SRV_PRE_ALPHA_COLOR)  uniform texture2D   r_input_color_pre_alpha;
+// layout (set = 1, binding = FSR2_BIND_SRV_POST_ALPHA_COLOR) uniform texture2D   r_input_color_post_alpha;
+// layout (set = 1, binding = FSR2_BIND_UAV_REACTIVE, r8)     uniform image2D     rw_output_reactive_mask;
+
+
+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
+#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR2_NUM_THREADS
+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
+#endif // #ifndef FFX_FSR2_NUM_THREADS
+
+#if defined(FSR2_BIND_CB_REACTIVE)
+layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
+{
+	float   scale;
+	float   threshold;
+	float   binaryValue;
+	uint    flags;
+} cbGenerateReactive;
+#endif
+
+FFX_FSR2_NUM_THREADS
+void main()
+{
+    FfxUInt32x2 uDispatchThreadId = gl_GlobalInvocationID.xy;
+
+    FfxFloat32x3 ColorPreAlpha  = LoadOpaqueOnly(FFX_MIN16_I2(uDispatchThreadId)).rgb;
+    FfxFloat32x3 ColorPostAlpha = LoadInputColor(FFX_MIN16_I2(uDispatchThreadId)).rgb;
+    
+    if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP) != 0)
+    {
+        ColorPreAlpha = Tonemap(ColorPreAlpha);
+        ColorPostAlpha = Tonemap(ColorPostAlpha);
+    }
+
+    if ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP) != 0)
+    {
+        ColorPreAlpha = InverseTonemap(ColorPreAlpha);
+        ColorPostAlpha = InverseTonemap(ColorPostAlpha);
+    }
+
+    FfxFloat32 out_reactive_value = 0.f;
+    FfxFloat32x3 delta = abs(ColorPostAlpha - ColorPreAlpha);
+    
+    out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX)!=0) ? max(delta.x, max(delta.y, delta.z)) : length(delta);
+    out_reactive_value *= cbGenerateReactive.scale;
+
+    out_reactive_value = ((cbGenerateReactive.flags & FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD)!=0) ? ((out_reactive_value < cbGenerateReactive.threshold) ? 0 : cbGenerateReactive.binaryValue) : out_reactive_value;
+
+    imageStore(rw_output_autoreactive, FfxInt32x2(uDispatchThreadId), vec4(out_reactive_value));
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
new file mode 100644
index 0000000000..45279bd357
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_glsl.h
@@ -0,0 +1,704 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+#include "ffx_fsr2_resources.h"
+
+#if defined(FFX_GPU)
+#include "ffx_core.h"
+#endif // #if defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+#ifndef FFX_FSR2_PREFER_WAVE64
+#define FFX_FSR2_PREFER_WAVE64
+#endif // #if defined(FFX_GPU)
+
+#if defined(FSR2_BIND_CB_FSR2)
+	layout (set = 1, binding = FSR2_BIND_CB_FSR2, std140) uniform cbFSR2_t
+	{
+		FfxInt32x2    iRenderSize;
+		FfxInt32x2    iMaxRenderSize;
+		FfxInt32x2    iDisplaySize;
+		FfxInt32x2    iInputColorResourceDimensions;
+		FfxInt32x2    iLumaMipDimensions;
+		FfxInt32      iLumaMipLevelToUse;
+		FfxInt32      iFrameIndex;
+
+		FfxFloat32x4  fDeviceToViewDepth;
+		FfxFloat32x2  fJitter;
+		FfxFloat32x2  fMotionVectorScale;
+		FfxFloat32x2  fDownscaleFactor;
+		FfxFloat32x2  fMotionVectorJitterCancellation;
+		FfxFloat32    fPreExposure;
+		FfxFloat32    fPreviousFramePreExposure;
+		FfxFloat32    fTanHalfFOV;
+		FfxFloat32    fJitterSequenceLength;
+		FfxFloat32    fDeltaTime;
+		FfxFloat32    fDynamicResChangeFactor;
+		FfxFloat32    fViewSpaceToMetersFactor;
+		
+		// -- GODOT start --
+		FfxFloat32    fPad;
+		mat4          mReprojectionMatrix;
+		// -- GODOT end --
+	} cbFSR2;
+#endif
+
+FfxInt32x2 RenderSize()
+{
+	return cbFSR2.iRenderSize;
+}
+
+FfxInt32x2 MaxRenderSize()
+{
+	return cbFSR2.iMaxRenderSize;
+}
+
+FfxInt32x2 DisplaySize()
+{
+	return cbFSR2.iDisplaySize;
+}
+
+FfxInt32x2 InputColorResourceDimensions()
+{
+	return cbFSR2.iInputColorResourceDimensions;
+}
+
+FfxInt32x2 LumaMipDimensions()
+{
+	return cbFSR2.iLumaMipDimensions;
+}
+
+FfxInt32  LumaMipLevelToUse()
+{
+	return cbFSR2.iLumaMipLevelToUse;
+}
+
+FfxInt32 FrameIndex()
+{
+	return cbFSR2.iFrameIndex;
+}
+
+FfxFloat32x4 DeviceToViewSpaceTransformFactors()
+{
+	return cbFSR2.fDeviceToViewDepth;
+}
+
+FfxFloat32x2 Jitter()
+{
+	return cbFSR2.fJitter;
+}
+
+FfxFloat32x2 MotionVectorScale()
+{
+	return cbFSR2.fMotionVectorScale;
+}
+
+FfxFloat32x2 DownscaleFactor()
+{
+	return cbFSR2.fDownscaleFactor;
+}
+
+FfxFloat32x2 MotionVectorJitterCancellation()
+{
+	return cbFSR2.fMotionVectorJitterCancellation;
+}
+
+FfxFloat32 PreExposure()
+{
+	return cbFSR2.fPreExposure;
+}
+
+FfxFloat32 PreviousFramePreExposure()
+{
+	return cbFSR2.fPreviousFramePreExposure;
+}
+
+FfxFloat32 TanHalfFoV()
+{
+	return cbFSR2.fTanHalfFOV;
+}
+
+FfxFloat32 JitterSequenceLength()
+{
+	return cbFSR2.fJitterSequenceLength;
+}
+
+FfxFloat32 DeltaTime()
+{
+	return cbFSR2.fDeltaTime;
+}
+
+FfxFloat32 DynamicResChangeFactor()
+{
+	return cbFSR2.fDynamicResChangeFactor;
+}
+
+FfxFloat32 ViewSpaceToMetersFactor()
+{
+	return cbFSR2.fViewSpaceToMetersFactor;
+}
+
+layout (set = 0, binding = 0) uniform sampler s_PointClamp;
+layout (set = 0, binding = 1) uniform sampler s_LinearClamp;
+
+// SRVs
+#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)                       uniform texture2D  r_input_opaque_only;
+#endif
+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_COLOR)                             uniform texture2D  r_input_color_jittered;
+#endif
+#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_MOTION_VECTORS)                    uniform texture2D  r_input_motion_vectors;
+#endif
+#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_DEPTH)                             uniform texture2D  r_input_depth;
+#endif
+#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
+	layout (set = 1, binding = FSR2_BIND_SRV_INPUT_EXPOSURE)                          uniform texture2D  r_input_exposure;
+#endif
+#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
+	layout(set = 1, binding = FSR2_BIND_SRV_AUTO_EXPOSURE)                            uniform texture2D  r_auto_exposure;
+#endif
+#if defined(FSR2_BIND_SRV_REACTIVE_MASK)
+	layout (set = 1, binding = FSR2_BIND_SRV_REACTIVE_MASK)                           uniform texture2D  r_reactive_mask;
+#endif
+#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
+	layout (set = 1, binding = FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)       uniform texture2D  r_transparency_and_composition_mask;
+#endif
+#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+	layout (set = 1, binding = FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)        uniform utexture2D r_reconstructed_previous_nearest_depth;
+#endif
+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
+	layout (set = 1, binding = FSR2_BIND_SRV_DILATED_MOTION_VECTORS)                  uniform texture2D  r_dilated_motion_vectors;
+#endif
+#if defined (FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
+	layout(set = 1, binding = FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)          uniform texture2D  r_previous_dilated_motion_vectors;
+#endif
+#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
+	layout (set = 1, binding = FSR2_BIND_SRV_DILATED_DEPTH)                           uniform texture2D  r_dilatedDepth;
+#endif
+#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
+	layout (set = 1, binding = FSR2_BIND_SRV_INTERNAL_UPSCALED)                       uniform texture2D  r_internal_upscaled_color;
+#endif
+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
+	layout (set = 1, binding = FSR2_BIND_SRV_LOCK_STATUS)                             uniform texture2D  r_lock_status;
+#endif
+#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
+	layout (set = 1, binding = FSR2_BIND_SRV_LOCK_INPUT_LUMA)                         uniform texture2D  r_lock_input_luma;
+#endif
+#if defined(FSR2_BIND_SRV_NEW_LOCKS)
+	layout(set = 1, binding = FSR2_BIND_SRV_NEW_LOCKS)                                uniform texture2D  r_new_locks;
+#endif
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
+	layout (set = 1, binding = FSR2_BIND_SRV_PREPARED_INPUT_COLOR)                    uniform texture2D  r_prepared_input_color;
+#endif
+#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
+	layout (set = 1, binding = FSR2_BIND_SRV_LUMA_HISTORY)                            uniform texture2D  r_luma_history;
+#endif
+#if defined(FSR2_BIND_SRV_RCAS_INPUT)
+	layout (set = 1, binding = FSR2_BIND_SRV_RCAS_INPUT)                              uniform texture2D  r_rcas_input;
+#endif
+#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
+	layout (set = 1, binding = FSR2_BIND_SRV_LANCZOS_LUT)                             uniform texture2D  r_lanczos_lut;
+#endif
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
+	layout (set = 1, binding = FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)                    uniform texture2D  r_imgMips;
+#endif
+#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
+	layout (set = 1, binding = FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)                uniform texture2D  r_upsample_maximum_bias_lut;
+#endif
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
+	layout (set = 1, binding = FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)                  uniform texture2D  r_dilated_reactive_masks;
+#endif			 
+#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
+	layout(set = 1, binding = FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) 				      uniform texture2D  r_input_prev_color_pre_alpha;
+#endif
+#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
+	layout(set = 1, binding = FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) 				      uniform texture2D  r_input_prev_color_post_alpha;
+#endif
+
+// UAV
+#if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
+	layout (set = 1, binding = FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH, r32ui) uniform uimage2D   rw_reconstructed_previous_nearest_depth;
+#endif
+#if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_MOTION_VECTORS, rg16f)           writeonly uniform image2D  rw_dilated_motion_vectors;
+#endif
+#if defined FSR2_BIND_UAV_DILATED_DEPTH
+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_DEPTH, r16f)                     writeonly uniform image2D  rw_dilatedDepth;
+#endif
+#if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
+	layout (set = 1, binding = FSR2_BIND_UAV_INTERNAL_UPSCALED, rgba16f)              writeonly uniform image2D  rw_internal_upscaled_color;
+#endif
+#if defined FSR2_BIND_UAV_LOCK_STATUS
+	layout (set = 1, binding = FSR2_BIND_UAV_LOCK_STATUS, rg16f)                      uniform image2D    rw_lock_status;
+#endif
+#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
+	layout(set = 1, binding = FSR2_BIND_UAV_LOCK_INPUT_LUMA, r16f)                    writeonly uniform image2D    rw_lock_input_luma;
+#endif
+#if defined FSR2_BIND_UAV_NEW_LOCKS
+	layout(set = 1, binding = FSR2_BIND_UAV_NEW_LOCKS, r8)				 		      uniform image2D    rw_new_locks;
+#endif
+#if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
+	layout (set = 1, binding = FSR2_BIND_UAV_PREPARED_INPUT_COLOR, rgba16)            writeonly uniform image2D  rw_prepared_input_color;
+#endif
+#if defined FSR2_BIND_UAV_LUMA_HISTORY
+	layout (set = 1, binding = FSR2_BIND_UAV_LUMA_HISTORY, rgba8)                     uniform image2D  rw_luma_history;
+#endif
+#if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
+	layout (set = 1, binding = FSR2_BIND_UAV_UPSCALED_OUTPUT /* app controlled format */) writeonly uniform image2D  rw_upscaled_output;
+#endif
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE, r16f)              coherent uniform image2D  rw_img_mip_shading_change;
+#endif
+#if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE_MIP_5, r16f)                        coherent uniform image2D  rw_img_mip_5;
+#endif
+#if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
+	layout (set = 1, binding = FSR2_BIND_UAV_DILATED_REACTIVE_MASKS, rg8)                 writeonly uniform image2D	 rw_dilated_reactive_masks;
+#endif 
+#if defined FSR2_BIND_UAV_EXPOSURE 
+	layout (set = 1, binding = FSR2_BIND_UAV_EXPOSURE, rg32f)                         uniform image2D    rw_exposure;
+#endif
+#if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+	layout(set = 1, binding = FSR2_BIND_UAV_AUTO_EXPOSURE, rg32f)                         uniform image2D    rw_auto_exposure;
+#endif
+#if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC 
+	layout (set = 1, binding = FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC, r32ui)       coherent uniform uimage2D   rw_spd_global_atomic;
+#endif
+
+#if defined FSR2_BIND_UAV_AUTOREACTIVE
+	layout(set = 1, binding = FSR2_BIND_UAV_AUTOREACTIVE, r32f)                       uniform image2D   	    rw_output_autoreactive;
+#endif
+#if defined FSR2_BIND_UAV_AUTOCOMPOSITION
+	layout(set = 1, binding = FSR2_BIND_UAV_AUTOCOMPOSITION, r32f)                    uniform image2D   	    rw_output_autocomposition;
+#endif
+#if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
+	layout(set = 1, binding = FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR, r11f_g11f_b10f)     uniform image2D   	    rw_output_prev_color_pre_alpha;
+#endif
+#if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
+	layout(set = 1, binding = FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR, r11f_g11f_b10f)    uniform image2D   	    rw_output_prev_color_post_alpha;
+#endif
+
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
+FfxFloat32 LoadMipLuma(FfxInt32x2 iPxPos, FfxInt32 mipLevel)
+{
+	return texelFetch(r_imgMips, iPxPos, FfxInt32(mipLevel)).r;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS)
+FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxInt32 mipLevel)
+{
+	return textureLod(sampler2D(r_imgMips, s_LinearClamp), fUV, FfxFloat32(mipLevel)).r;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_DEPTH)
+FfxFloat32 LoadInputDepth(FfxInt32x2 iPxPos)
+{
+	return texelFetch(r_input_depth, iPxPos, 0).r;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_REACTIVE_MASK) 
+FfxFloat32 LoadReactiveMask(FfxInt32x2 iPxPos)
+{
+// -- GODOT start --
+#if FFX_FSR2_OPTION_GODOT_REACTIVE_MASK_CLAMP
+	return min(texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r, 0.9f);
+#else
+	return texelFetch(r_reactive_mask, FfxInt32x2(iPxPos), 0).r;
+#endif
+// -- GODOT end --
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK)
+FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
+{
+	return texelFetch(r_transparency_and_composition_mask, FfxInt32x2(iPxPos), 0).r;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
+FfxFloat32x3 LoadInputColor(FfxInt32x2 iPxPos)
+{
+	return texelFetch(r_input_color_jittered, iPxPos, 0).rgb;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_COLOR)
+FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
+{
+	return textureLod(sampler2D(r_input_color_jittered, s_LinearClamp), fUV, 0.0f).rgb;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
+FfxFloat32x3 LoadPreparedInputColor(FfxInt32x2 iPxPos)
+{
+	return texelFetch(r_prepared_input_color, iPxPos, 0).xyz;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS)
+FfxFloat32x2 LoadInputMotionVector(FfxInt32x2 iPxDilatedMotionVectorPos)
+{
+	FfxFloat32x2 fSrcMotionVector = texelFetch(r_input_motion_vectors, iPxDilatedMotionVectorPos, 0).xy;
+
+// -- GODOT start --
+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
+	bool bInvalidMotionVector = all(lessThanEqual(fSrcMotionVector, vec2(-1.0f, -1.0f)));
+	if (bInvalidMotionVector)
+	{
+		FfxFloat32 fSrcDepth = LoadInputDepth(iPxDilatedMotionVectorPos);
+		FfxFloat32x2 fUv = (iPxDilatedMotionVectorPos + FfxFloat32(0.5)) / RenderSize();
+		fSrcMotionVector = FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS_FUNCTION(fUv, fSrcDepth, cbFSR2.mReprojectionMatrix);
+	}
+#endif
+// -- GODOT end --
+
+	FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
+
+#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
+	fUvMotionVector -= MotionVectorJitterCancellation();
+#endif
+
+	return fUvMotionVector;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED)
+FfxFloat32x4 LoadHistory(FfxInt32x2 iPxHistory)
+{
+	return texelFetch(r_internal_upscaled_color, iPxHistory, 0);
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_LUMA_HISTORY)
+void StoreLumaHistory(FfxInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
+{
+	imageStore(rw_luma_history, FfxInt32x2(iPxPos), fLumaHistory);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LUMA_HISTORY)
+FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
+{
+	return textureLod(sampler2D(r_luma_history, s_LinearClamp), fUV, 0.0f);
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
+void StoreReprojectedHistory(FfxInt32x2 iPxHistory, FfxFloat32x4 fHistory)
+{
+	imageStore(rw_internal_upscaled_color, iPxHistory, fHistory);
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED)
+void StoreInternalColorAndWeight(FfxInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
+{
+	imageStore(rw_internal_upscaled_color, FfxInt32x2(iPxPos), fColorAndWeight);
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT)
+void StoreUpscaledOutput(FfxInt32x2 iPxPos, FfxFloat32x3 fColor)
+{
+    imageStore(rw_upscaled_output, FfxInt32x2(iPxPos), FfxFloat32x4(fColor, 1.f));
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
+FfxFloat32x2 LoadLockStatus(FfxInt32x2 iPxPos)
+{
+	FfxFloat32x2 fLockStatus = texelFetch(r_lock_status, iPxPos, 0).rg;
+
+    return fLockStatus;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_LOCK_STATUS)
+void StoreLockStatus(FfxInt32x2 iPxPos, FfxFloat32x2 fLockstatus)
+{
+	imageStore(rw_lock_status, iPxPos, vec4(fLockstatus, 0.0f, 0.0f));
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA)
+FfxFloat32 LoadLockInputLuma(FfxInt32x2 iPxPos)
+{
+	return texelFetch(r_lock_input_luma, iPxPos, 0).r;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA)
+void StoreLockInputLuma(FfxInt32x2 iPxPos, FfxFloat32 fLuma)
+{
+	imageStore(rw_lock_input_luma, iPxPos, vec4(fLuma, 0, 0, 0));
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_NEW_LOCKS)
+FfxFloat32 LoadNewLocks(FfxInt32x2 iPxPos)
+{
+	return texelFetch(r_new_locks, iPxPos, 0).r;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_NEW_LOCKS)
+FfxFloat32 LoadRwNewLocks(FfxInt32x2 iPxPos)
+{
+	return imageLoad(rw_new_locks, iPxPos).r;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_NEW_LOCKS)
+void StoreNewLocks(FfxInt32x2 iPxPos, FfxFloat32 newLock)
+{
+	imageStore(rw_new_locks, iPxPos, vec4(newLock, 0, 0, 0));
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR)
+void StorePreparedInputColor(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
+{
+	imageStore(rw_prepared_input_color, iPxPos, fTonemapped);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR)
+FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
+{
+	return textureLod(sampler2D(r_prepared_input_color, s_LinearClamp), fUV, 0.0f).w;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LOCK_STATUS)
+FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
+{
+	FfxFloat32x2 fLockStatus = textureLod(sampler2D(r_lock_status, s_LinearClamp), fUV, 0.0f).rg;
+	return fLockStatus;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DEPTH)
+FfxFloat32 LoadSceneDepth(FfxInt32x2 iPxInput)
+{
+	return texelFetch(r_input_depth, iPxInput, 0).r;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+FfxFloat32 LoadReconstructedPrevDepth(FfxInt32x2 iPxPos)
+{
+	return uintBitsToFloat(texelFetch(r_reconstructed_previous_nearest_depth, iPxPos, 0).r);
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+void StoreReconstructedDepth(FfxInt32x2 iPxSample, FfxFloat32 fDepth)
+{
+	FfxUInt32 uDepth = floatBitsToUint(fDepth);
+
+	#if FFX_FSR2_OPTION_INVERTED_DEPTH
+		imageAtomicMax(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth);
+	#else
+		imageAtomicMin(rw_reconstructed_previous_nearest_depth, iPxSample, uDepth); // min for standard, max for inverted depth
+	#endif
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH)
+void SetReconstructedDepth(FfxInt32x2 iPxSample, FfxUInt32 uValue)
+{
+	imageStore(rw_reconstructed_previous_nearest_depth, iPxSample, uvec4(uValue, 0, 0, 0));
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_DILATED_DEPTH)
+void StoreDilatedDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
+{
+	//FfxUInt32 uDepth = f32tof16(fDepth);
+	imageStore(rw_dilatedDepth, iPxPos, vec4(fDepth, 0.0f, 0.0f, 0.0f));
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) 
+void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
+{
+	imageStore(rw_dilated_motion_vectors, iPxPos, vec4(fMotionVector, 0.0f, 0.0f));
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
+FfxFloat32x2 LoadDilatedMotionVector(FfxInt32x2 iPxInput)
+{
+	return texelFetch(r_dilated_motion_vectors, iPxInput, 0).rg;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS)
+FfxFloat32x2 SampleDilatedMotionVector(FfxFloat32x2 fUV)
+{
+	return textureLod(sampler2D(r_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).rg;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS)
+FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxInt32x2 iPxInput)
+{
+	return texelFetch(r_previous_dilated_motion_vectors, iPxInput, 0).rg;
+}
+
+FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 fUV)
+{
+	return textureLod(sampler2D(r_previous_dilated_motion_vectors, s_LinearClamp), fUV, 0.0f).xy;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_DEPTH)
+FfxFloat32 LoadDilatedDepth(FfxInt32x2 iPxInput)
+{
+	return texelFetch(r_dilatedDepth, iPxInput, 0).r;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE)
+FfxFloat32 Exposure()
+{
+	FfxFloat32 exposure = texelFetch(r_input_exposure, FfxInt32x2(0, 0), 0).x;
+
+	if (exposure == 0.0f) {
+		exposure = 1.0f;
+	}
+
+	return exposure;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE)
+FfxFloat32 AutoExposure()
+{
+	FfxFloat32 exposure = texelFetch(r_auto_exposure, FfxInt32x2(0, 0), 0).x;
+
+	if (exposure == 0.0f) {
+		exposure = 1.0f;
+	}
+
+	return exposure;
+}
+#endif
+
+FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
+{
+#if defined(FSR2_BIND_SRV_LANCZOS_LUT)
+	return textureLod(sampler2D(r_lanczos_lut, s_LinearClamp), FfxFloat32x2(x / 2.0f, 0.5f), 0.0f).x; 
+#else
+    return 0.f;
+#endif
+}
+
+#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT)
+FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
+{
+    // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
+    return FfxFloat32(2.0f) * FfxFloat32(textureLod(sampler2D(r_upsample_maximum_bias_lut, s_LinearClamp), abs(uv) * 2.0f, 0.0f).r);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
+FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
+{
+	return textureLod(sampler2D(r_dilated_reactive_masks, s_LinearClamp), fUV, 0.0f).rg;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS)
+FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos)
+{
+    return texelFetch(r_dilated_reactive_masks, iPxPos, 0).rg;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS)
+void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
+{
+    imageStore(rw_dilated_reactive_masks, iPxPos, vec4(fDilatedReactiveMasks, 0.0f, 0.0f));
+}
+#endif
+
+#if defined(FFX_INTERNAL)
+FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
+{
+    return textureLod(sampler2D(r_debug_out, s_LinearClamp), fUV, 0.0f).rgba;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY)
+FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+	return texelFetch(r_input_opaque_only, iPxPos, 0).xyz;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR)
+FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+	return texelFetch(r_input_prev_color_pre_alpha, iPxPos, 0).xyz;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR)
+FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+	return texelFetch(r_input_prev_color_post_alpha, iPxPos, 0).xyz;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_AUTOREACTIVE)
+#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION)
+void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
+{
+	imageStore(rw_output_autoreactive, iPxPos, vec4(FfxFloat32(fReactive.x), 0.0f, 0.0f, 0.0f));
+
+	imageStore(rw_output_autocomposition, iPxPos, vec4(FfxFloat32(fReactive.y), 0.0f, 0.0f, 0.0f));
+}
+#endif
+#endif
+
+#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR)
+void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
+{
+	imageStore(rw_output_prev_color_pre_alpha, iPxPos, vec4(color, 0.0f));
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR)
+void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
+{
+	imageStore(rw_output_prev_color_post_alpha, iPxPos, vec4(color, 0.0f));
+}
+#endif
+
+#endif // #if defined(FFX_GPU)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
new file mode 100644
index 0000000000..fd722b307e
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_callbacks_hlsl.h
@@ -0,0 +1,799 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#include "ffx_fsr2_resources.h"
+
+#if defined(FFX_GPU)
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic push
+#pragma dxc diagnostic ignored "-Wambig-lit-shift"
+#endif //__hlsl_dx_compiler
+#include "ffx_core.h"
+#ifdef __hlsl_dx_compiler
+#pragma dxc diagnostic pop
+#endif //__hlsl_dx_compiler
+#endif // #if defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+#ifndef FFX_FSR2_PREFER_WAVE64
+#define FFX_FSR2_PREFER_WAVE64
+#endif // #if defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+#pragma warning(disable: 3205)  // conversion from larger type to smaller
+#endif // #if defined(FFX_GPU)
+
+#define DECLARE_SRV_REGISTER(regIndex)  t##regIndex
+#define DECLARE_UAV_REGISTER(regIndex)  u##regIndex
+#define DECLARE_CB_REGISTER(regIndex)   b##regIndex
+#define FFX_FSR2_DECLARE_SRV(regIndex)  register(DECLARE_SRV_REGISTER(regIndex))
+#define FFX_FSR2_DECLARE_UAV(regIndex)  register(DECLARE_UAV_REGISTER(regIndex))
+#define FFX_FSR2_DECLARE_CB(regIndex)   register(DECLARE_CB_REGISTER(regIndex))
+
+#if defined(FSR2_BIND_CB_FSR2) || defined(FFX_INTERNAL)
+    cbuffer cbFSR2 : FFX_FSR2_DECLARE_CB(FSR2_BIND_CB_FSR2)
+    {
+        FfxInt32x2    iRenderSize;
+        FfxInt32x2    iMaxRenderSize;
+        FfxInt32x2    iDisplaySize;
+        FfxInt32x2    iInputColorResourceDimensions;
+        FfxInt32x2    iLumaMipDimensions;
+        FfxInt32      iLumaMipLevelToUse;
+        FfxInt32      iFrameIndex;
+
+        FfxFloat32x4  fDeviceToViewDepth;
+        FfxFloat32x2  fJitter;
+        FfxFloat32x2  fMotionVectorScale;
+        FfxFloat32x2  fDownscaleFactor;
+        FfxFloat32x2  fMotionVectorJitterCancellation;
+        FfxFloat32    fPreExposure;
+        FfxFloat32    fPreviousFramePreExposure;
+        FfxFloat32    fTanHalfFOV;
+        FfxFloat32    fJitterSequenceLength;
+        FfxFloat32    fDeltaTime;
+        FfxFloat32    fDynamicResChangeFactor;
+        FfxFloat32    fViewSpaceToMetersFactor;
+    };
+
+#define FFX_FSR2_CONSTANT_BUFFER_1_SIZE (sizeof(cbFSR2) / 4)  // Number of 32-bit values. This must be kept in sync with the cbFSR2 size.
+#endif
+
+#if defined(FFX_GPU)
+#define FFX_FSR2_ROOTSIG_STRINGIFY(p) FFX_FSR2_ROOTSIG_STR(p)
+#define FFX_FSR2_ROOTSIG_STR(p) #p
+#define FFX_FSR2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+                                    "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+                                    "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
+                                    "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "comparisonFunc = COMPARISON_NEVER, " \
+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+                                    "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "comparisonFunc = COMPARISON_NEVER, " \
+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+
+#define FFX_FSR2_CONSTANT_BUFFER_2_SIZE 6  // Number of 32-bit values. This must be kept in sync with max( cbRCAS , cbSPD) size.
+
+#define FFX_FSR2_CB2_ROOTSIG [RootSignature( "DescriptorTable(UAV(u0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+                                    "DescriptorTable(SRV(t0, numDescriptors = " FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_RESOURCE_IDENTIFIER_COUNT) ")), " \
+                                    "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_1_SIZE) ", b0), " \
+                                    "RootConstants(num32BitConstants=" FFX_FSR2_ROOTSIG_STRINGIFY(FFX_FSR2_CONSTANT_BUFFER_2_SIZE) ", b1), " \
+                                    "StaticSampler(s0, filter = FILTER_MIN_MAG_MIP_POINT, " \
+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "comparisonFunc = COMPARISON_NEVER, " \
+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK), " \
+                                    "StaticSampler(s1, filter = FILTER_MIN_MAG_MIP_LINEAR, " \
+                                                      "addressU = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressV = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "addressW = TEXTURE_ADDRESS_CLAMP, " \
+                                                      "comparisonFunc = COMPARISON_NEVER, " \
+                                                      "borderColor = STATIC_BORDER_COLOR_TRANSPARENT_BLACK)" )]
+#if defined(FFX_FSR2_EMBED_ROOTSIG)
+#define FFX_FSR2_EMBED_ROOTSIG_CONTENT FFX_FSR2_ROOTSIG
+#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT FFX_FSR2_CB2_ROOTSIG
+#else
+#define FFX_FSR2_EMBED_ROOTSIG_CONTENT
+#define FFX_FSR2_EMBED_CB2_ROOTSIG_CONTENT
+#endif // #if FFX_FSR2_EMBED_ROOTSIG
+#endif // #if defined(FFX_GPU)
+
+/* Define getter functions in the order they are defined in the CB! */
+FfxInt32x2 RenderSize()
+{
+    return iRenderSize;
+}
+
+FfxInt32x2 MaxRenderSize()
+{
+    return iMaxRenderSize;
+}
+
+FfxInt32x2 DisplaySize()
+{
+    return iDisplaySize;
+}
+
+FfxInt32x2 InputColorResourceDimensions()
+{
+    return iInputColorResourceDimensions;
+}
+
+FfxInt32x2 LumaMipDimensions()
+{
+    return iLumaMipDimensions;
+}
+
+FfxInt32  LumaMipLevelToUse()
+{
+    return iLumaMipLevelToUse;
+}
+
+FfxInt32 FrameIndex()
+{
+    return iFrameIndex;
+}
+
+FfxFloat32x2 Jitter()
+{
+    return fJitter;
+}
+
+FfxFloat32x4 DeviceToViewSpaceTransformFactors()
+{
+    return fDeviceToViewDepth;
+}
+
+FfxFloat32x2 MotionVectorScale()
+{
+    return fMotionVectorScale;
+}
+
+FfxFloat32x2 DownscaleFactor()
+{
+    return fDownscaleFactor;
+}
+
+FfxFloat32x2 MotionVectorJitterCancellation()
+{
+    return fMotionVectorJitterCancellation;
+}
+
+FfxFloat32 PreExposure()
+{
+    return fPreExposure;
+}
+
+FfxFloat32 PreviousFramePreExposure()
+{
+    return fPreviousFramePreExposure;
+}
+
+FfxFloat32 TanHalfFoV()
+{
+    return fTanHalfFOV;
+}
+
+FfxFloat32 JitterSequenceLength()
+{
+    return fJitterSequenceLength;
+}
+
+FfxFloat32 DeltaTime()
+{
+    return fDeltaTime;
+}
+
+FfxFloat32 DynamicResChangeFactor()
+{
+    return fDynamicResChangeFactor;
+}
+
+FfxFloat32 ViewSpaceToMetersFactor()
+{
+    return fViewSpaceToMetersFactor;
+}
+
+
+SamplerState s_PointClamp : register(s0);
+SamplerState s_LinearClamp : register(s1);
+
+// SRVs
+#if defined(FFX_INTERNAL)
+    Texture2D<FfxFloat32x4>                       r_input_opaque_only                       : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY);
+    Texture2D<FfxFloat32x4>                       r_input_color_jittered                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR);
+    Texture2D<FfxFloat32x4>                       r_input_motion_vectors                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS);
+    Texture2D<FfxFloat32>                         r_input_depth                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH);
+    Texture2D<FfxFloat32x2>                       r_input_exposure                          : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE);
+    Texture2D<FfxFloat32x2>                       r_auto_exposure                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
+    Texture2D<FfxFloat32>                         r_reactive_mask                           : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK);
+    Texture2D<FfxFloat32>                         r_transparency_and_composition_mask       : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK);
+    Texture2D<FfxUInt32>                          r_reconstructed_previous_nearest_depth    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
+    Texture2D<FfxFloat32x2>                       r_dilated_motion_vectors                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
+    Texture2D<FfxFloat32x2>                       r_previous_dilated_motion_vectors         : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS);
+    Texture2D<FfxFloat32>                         r_dilatedDepth                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
+    Texture2D<FfxFloat32x4>                       r_internal_upscaled_color                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
+    Texture2D<unorm FfxFloat32x2>                 r_lock_status                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
+    Texture2D<FfxFloat32>                         r_lock_input_luma                         : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
+    Texture2D<unorm FfxFloat32>                   r_new_locks                               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
+    Texture2D<FfxFloat32x4>                       r_prepared_input_color                    : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
+    Texture2D<FfxFloat32x4>                       r_luma_history                            : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
+    Texture2D<FfxFloat32x4>                       r_rcas_input                              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT);
+    Texture2D<FfxFloat32>                         r_lanczos_lut                             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT);
+    Texture2D<FfxFloat32>                         r_imgMips                                 : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE);
+    Texture2D<FfxFloat32>                         r_upsample_maximum_bias_lut               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT);
+    Texture2D<unorm FfxFloat32x2>                 r_dilated_reactive_masks                  : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
+    Texture2D<float3>                             r_input_prev_color_pre_alpha              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
+    Texture2D<float3>                             r_input_prev_color_post_alpha             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
+
+    Texture2D<FfxFloat32x4>                       r_debug_out                               : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
+
+    // UAV declarations
+    RWTexture2D<FfxUInt32>                        rw_reconstructed_previous_nearest_depth   : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH);
+    RWTexture2D<FfxFloat32x2>                     rw_dilated_motion_vectors                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS);
+    RWTexture2D<FfxFloat32>                       rw_dilatedDepth                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH);
+    RWTexture2D<FfxFloat32x4>                     rw_internal_upscaled_color                : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR);
+    RWTexture2D<unorm FfxFloat32x2>               rw_lock_status                            : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS);
+    RWTexture2D<FfxFloat32>                       rw_lock_input_luma                        : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA);
+    RWTexture2D<unorm FfxFloat32>                 rw_new_locks                              : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS);
+    RWTexture2D<FfxFloat32x4>                     rw_prepared_input_color                   : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR);
+    RWTexture2D<FfxFloat32x4>                     rw_luma_history                           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY);
+    RWTexture2D<FfxFloat32x4>                     rw_upscaled_output                        : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT);
+
+    globallycoherent RWTexture2D<FfxFloat32>      rw_img_mip_shading_change                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE);
+    globallycoherent RWTexture2D<FfxFloat32>      rw_img_mip_5                              : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5);
+    RWTexture2D<unorm FfxFloat32x2>               rw_dilated_reactive_masks                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS);
+    RWTexture2D<FfxFloat32x2>                     rw_auto_exposure                          : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE);
+    globallycoherent RWTexture2D<FfxUInt32>       rw_spd_global_atomic                      : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT);
+    RWTexture2D<FfxFloat32x4>                     rw_debug_out                              : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT);
+    
+    RWTexture2D<float>                            rw_output_autoreactive                    : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE);
+    RWTexture2D<float>                            rw_output_autocomposition                 : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION);
+    RWTexture2D<float3>                           rw_output_prev_color_pre_alpha            : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
+    RWTexture2D<float3>                           rw_output_prev_color_post_alpha           : FFX_FSR2_DECLARE_UAV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);  
+
+#else // #if defined(FFX_INTERNAL)
+    #if defined FSR2_BIND_SRV_INPUT_COLOR
+        Texture2D<FfxFloat32x4>                   r_input_color_jittered                    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_COLOR);
+    #endif
+    #if defined FSR2_BIND_SRV_INPUT_OPAQUE_ONLY
+        Texture2D<FfxFloat32x4>                   r_input_opaque_only                       : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY);
+    #endif
+    #if defined FSR2_BIND_SRV_INPUT_MOTION_VECTORS
+        Texture2D<FfxFloat32x4>                   r_input_motion_vectors                    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_MOTION_VECTORS);
+    #endif
+    #if defined FSR2_BIND_SRV_INPUT_DEPTH
+        Texture2D<FfxFloat32>                     r_input_depth                             : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_DEPTH);
+    #endif 
+    #if defined FSR2_BIND_SRV_INPUT_EXPOSURE
+        Texture2D<FfxFloat32x2>                   r_input_exposure                          : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INPUT_EXPOSURE);
+    #endif
+    #if defined FSR2_BIND_SRV_AUTO_EXPOSURE
+        Texture2D<FfxFloat32x2>                   r_auto_exposure                           : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_AUTO_EXPOSURE);
+    #endif
+    #if defined FSR2_BIND_SRV_REACTIVE_MASK
+        Texture2D<FfxFloat32>                     r_reactive_mask                           : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_REACTIVE_MASK);
+    #endif 
+    #if defined FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK
+        Texture2D<FfxFloat32>                     r_transparency_and_composition_mask       : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK);
+    #endif
+    #if defined FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH
+        Texture2D<FfxUInt32>                      r_reconstructed_previous_nearest_depth    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
+    #endif 
+    #if defined FSR2_BIND_SRV_DILATED_MOTION_VECTORS
+       Texture2D<FfxFloat32x2>                    r_dilated_motion_vectors                  : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_MOTION_VECTORS);
+    #endif
+    #if defined FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS
+           Texture2D<FfxFloat32x2>                r_previous_dilated_motion_vectors         : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS);
+    #endif
+    #if defined FSR2_BIND_SRV_DILATED_DEPTH
+        Texture2D<FfxFloat32>                     r_dilatedDepth                            : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_DEPTH);
+    #endif
+    #if defined FSR2_BIND_SRV_INTERNAL_UPSCALED
+        Texture2D<FfxFloat32x4>                   r_internal_upscaled_color                 : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_INTERNAL_UPSCALED);
+    #endif
+    #if defined FSR2_BIND_SRV_LOCK_STATUS
+        Texture2D<unorm FfxFloat32x2>             r_lock_status                             : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_STATUS);
+    #endif
+    #if defined FSR2_BIND_SRV_LOCK_INPUT_LUMA
+        Texture2D<FfxFloat32>                     r_lock_input_luma                         : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LOCK_INPUT_LUMA);
+    #endif
+    #if defined FSR2_BIND_SRV_NEW_LOCKS
+        Texture2D<unorm FfxFloat32>               r_new_locks                               : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_NEW_LOCKS);
+    #endif
+    #if defined FSR2_BIND_SRV_PREPARED_INPUT_COLOR
+        Texture2D<FfxFloat32x4>                  r_prepared_input_color                    : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_PREPARED_INPUT_COLOR);
+    #endif
+    #if defined FSR2_BIND_SRV_LUMA_HISTORY
+        Texture2D<unorm FfxFloat32x4>             r_luma_history                            : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LUMA_HISTORY);
+    #endif
+    #if defined FSR2_BIND_SRV_RCAS_INPUT
+        Texture2D<FfxFloat32x4>                   r_rcas_input                              : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_RCAS_INPUT);
+    #endif
+    #if defined FSR2_BIND_SRV_LANCZOS_LUT
+        Texture2D<FfxFloat32>                     r_lanczos_lut                             : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_LANCZOS_LUT);
+    #endif
+    #if defined FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS
+        Texture2D<FfxFloat32>                     r_imgMips                                 : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS);
+    #endif
+    #if defined FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT
+        Texture2D<FfxFloat32>                     r_upsample_maximum_bias_lut               : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT);
+    #endif
+    #if defined FSR2_BIND_SRV_DILATED_REACTIVE_MASKS
+        Texture2D<unorm FfxFloat32x2>             r_dilated_reactive_masks                  : FFX_FSR2_DECLARE_SRV(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS);
+    #endif
+
+    #if defined FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR
+        Texture2D<float3>                         r_input_prev_color_pre_alpha              : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR);
+    #endif
+    #if defined FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR
+        Texture2D<float3>                         r_input_prev_color_post_alpha             : FFX_FSR2_DECLARE_SRV(FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR);
+    #endif
+   
+    // UAV declarations
+    #if defined FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH
+        RWTexture2D<FfxUInt32>                    rw_reconstructed_previous_nearest_depth   : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH);
+    #endif
+    #if defined FSR2_BIND_UAV_DILATED_MOTION_VECTORS
+        RWTexture2D<FfxFloat32x2>                 rw_dilated_motion_vectors                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_MOTION_VECTORS);
+    #endif
+    #if defined FSR2_BIND_UAV_DILATED_DEPTH
+        RWTexture2D<FfxFloat32>                   rw_dilatedDepth                           : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_DEPTH);
+    #endif
+    #if defined FSR2_BIND_UAV_INTERNAL_UPSCALED
+        RWTexture2D<FfxFloat32x4>                 rw_internal_upscaled_color                : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_INTERNAL_UPSCALED);
+    #endif
+    #if defined FSR2_BIND_UAV_LOCK_STATUS
+        RWTexture2D<unorm FfxFloat32x2>           rw_lock_status                            : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_STATUS);
+    #endif
+    #if defined FSR2_BIND_UAV_LOCK_INPUT_LUMA
+        RWTexture2D<FfxFloat32>                   rw_lock_input_luma                        : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LOCK_INPUT_LUMA);
+    #endif
+    #if defined FSR2_BIND_UAV_NEW_LOCKS
+        RWTexture2D<unorm FfxFloat32>             rw_new_locks                              : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_NEW_LOCKS);
+    #endif
+    #if defined FSR2_BIND_UAV_PREPARED_INPUT_COLOR
+        RWTexture2D<FfxFloat32x4>                 rw_prepared_input_color                   : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREPARED_INPUT_COLOR);
+    #endif
+    #if defined FSR2_BIND_UAV_LUMA_HISTORY
+        RWTexture2D<FfxFloat32x4>                 rw_luma_history                           : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_LUMA_HISTORY);
+    #endif
+    #if defined FSR2_BIND_UAV_UPSCALED_OUTPUT
+        RWTexture2D<FfxFloat32x4>                 rw_upscaled_output                        : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_UPSCALED_OUTPUT);
+    #endif
+    #if defined FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE
+        globallycoherent RWTexture2D<FfxFloat32>  rw_img_mip_shading_change                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE);
+    #endif
+    #if defined FSR2_BIND_UAV_EXPOSURE_MIP_5
+        globallycoherent RWTexture2D<FfxFloat32>  rw_img_mip_5                              : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE_MIP_5);
+    #endif
+    #if defined FSR2_BIND_UAV_DILATED_REACTIVE_MASKS
+        RWTexture2D<unorm FfxFloat32x2>           rw_dilated_reactive_masks                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS);
+    #endif
+    #if defined FSR2_BIND_UAV_EXPOSURE
+        RWTexture2D<FfxFloat32x2>                 rw_exposure                               : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_EXPOSURE);
+    #endif
+    #if defined FSR2_BIND_UAV_AUTO_EXPOSURE
+        RWTexture2D<FfxFloat32x2>                 rw_auto_exposure                          : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTO_EXPOSURE);
+    #endif
+    #if defined FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC
+        globallycoherent RWTexture2D<FfxUInt32>   rw_spd_global_atomic                      : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC);
+    #endif
+
+    #if defined FSR2_BIND_UAV_AUTOREACTIVE
+        RWTexture2D<float>                        rw_output_autoreactive                    : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOREACTIVE);
+    #endif
+    #if defined FSR2_BIND_UAV_AUTOCOMPOSITION
+        RWTexture2D<float>                        rw_output_autocomposition                 : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_AUTOCOMPOSITION);
+    #endif
+    #if defined FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR
+        RWTexture2D<float3>                       rw_output_prev_color_pre_alpha            : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR);
+    #endif
+    #if defined FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR
+        RWTexture2D<float3>                       rw_output_prev_color_post_alpha           : FFX_FSR2_DECLARE_UAV(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR);
+    #endif
+#endif // #if defined(FFX_INTERNAL)
+
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
+FfxFloat32 LoadMipLuma(FfxUInt32x2 iPxPos, FfxUInt32 mipLevel)
+{
+    return r_imgMips.mips[mipLevel][iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_SCENE_LUMINANCE_MIPS) || defined(FFX_INTERNAL)
+FfxFloat32 SampleMipLuma(FfxFloat32x2 fUV, FfxUInt32 mipLevel)
+{
+    return r_imgMips.SampleLevel(s_LinearClamp, fUV, mipLevel);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
+FfxFloat32 LoadInputDepth(FfxUInt32x2 iPxPos)
+{
+    return r_input_depth[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_DEPTH) || defined(FFX_INTERNAL)
+FfxFloat32 SampleInputDepth(FfxFloat32x2 fUV)
+{
+    return r_input_depth.SampleLevel(s_LinearClamp, fUV, 0).x;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_REACTIVE_MASK) || defined(FFX_INTERNAL)
+FfxFloat32 LoadReactiveMask(FfxUInt32x2 iPxPos)
+{
+    return r_reactive_mask[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK) || defined(FFX_INTERNAL)
+FfxFloat32 LoadTransparencyAndCompositionMask(FfxUInt32x2 iPxPos)
+{
+    return r_transparency_and_composition_mask[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
+FfxFloat32x3 LoadInputColor(FfxUInt32x2 iPxPos)
+{
+    return r_input_color_jittered[iPxPos].rgb;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_COLOR) || defined(FFX_INTERNAL)
+FfxFloat32x3 SampleInputColor(FfxFloat32x2 fUV)
+{
+    return r_input_color_jittered.SampleLevel(s_LinearClamp, fUV, 0).rgb;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
+FfxFloat32x3 LoadPreparedInputColor(FfxUInt32x2 iPxPos)
+{
+    return r_prepared_input_color[iPxPos].xyz;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_MOTION_VECTORS) || defined(FFX_INTERNAL)
+FfxFloat32x2 LoadInputMotionVector(FfxUInt32x2 iPxDilatedMotionVectorPos)
+{
+    FfxFloat32x2 fSrcMotionVector = r_input_motion_vectors[iPxDilatedMotionVectorPos].xy;
+
+    FfxFloat32x2 fUvMotionVector = fSrcMotionVector * MotionVectorScale();
+
+#if FFX_FSR2_OPTION_JITTERED_MOTION_VECTORS
+    fUvMotionVector -= MotionVectorJitterCancellation();
+#endif
+
+    return fUvMotionVector;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
+FfxFloat32x4 LoadHistory(FfxUInt32x2 iPxHistory)
+{
+    return r_internal_upscaled_color[iPxHistory];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_LUMA_HISTORY) || defined(FFX_INTERNAL)
+void StoreLumaHistory(FfxUInt32x2 iPxPos, FfxFloat32x4 fLumaHistory)
+{
+    rw_luma_history[iPxPos] = fLumaHistory;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LUMA_HISTORY) || defined(FFX_INTERNAL)
+FfxFloat32x4 SampleLumaHistory(FfxFloat32x2 fUV)
+{
+    return r_luma_history.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FFX_INTERNAL)
+FfxFloat32x4 SampleDebug(FfxFloat32x2 fUV)
+{
+    return r_debug_out.SampleLevel(s_LinearClamp, fUV, 0).w;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
+void StoreReprojectedHistory(FfxUInt32x2 iPxHistory, FfxFloat32x4 fHistory)
+{
+    rw_internal_upscaled_color[iPxHistory] = fHistory;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_INTERNAL_UPSCALED) || defined(FFX_INTERNAL)
+void StoreInternalColorAndWeight(FfxUInt32x2 iPxPos, FfxFloat32x4 fColorAndWeight)
+{
+    rw_internal_upscaled_color[iPxPos] = fColorAndWeight;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_UPSCALED_OUTPUT) || defined(FFX_INTERNAL)
+void StoreUpscaledOutput(FfxUInt32x2 iPxPos, FfxFloat32x3 fColor)
+{
+    rw_upscaled_output[iPxPos] = FfxFloat32x4(fColor, 1.f);
+}
+#endif
+
+//LOCK_LIFETIME_REMAINING == 0
+//Should make LockInitialLifetime() return a const 1.0f later
+#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
+FfxFloat32x2 LoadLockStatus(FfxUInt32x2 iPxPos)
+{
+    return r_lock_status[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_LOCK_STATUS) || defined(FFX_INTERNAL)
+void StoreLockStatus(FfxUInt32x2 iPxPos, FfxFloat32x2 fLockStatus)
+{
+    rw_lock_status[iPxPos] = fLockStatus;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
+FfxFloat32 LoadLockInputLuma(FfxUInt32x2 iPxPos)
+{
+    return r_lock_input_luma[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_LOCK_INPUT_LUMA) || defined(FFX_INTERNAL)
+void StoreLockInputLuma(FfxUInt32x2 iPxPos, FfxFloat32 fLuma)
+{
+    rw_lock_input_luma[iPxPos] = fLuma;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_NEW_LOCKS) || defined(FFX_INTERNAL)
+FfxFloat32 LoadNewLocks(FfxUInt32x2 iPxPos)
+{
+    return r_new_locks[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
+FfxFloat32 LoadRwNewLocks(FfxUInt32x2 iPxPos)
+{
+    return rw_new_locks[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_NEW_LOCKS) || defined(FFX_INTERNAL)
+void StoreNewLocks(FfxUInt32x2 iPxPos, FfxFloat32 newLock)
+{
+    rw_new_locks[iPxPos] = newLock;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
+void StorePreparedInputColor(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x4 fTonemapped)
+{
+    rw_prepared_input_color[iPxPos] = fTonemapped;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREPARED_INPUT_COLOR) || defined(FFX_INTERNAL)
+FfxFloat32 SampleDepthClip(FfxFloat32x2 fUV)
+{
+    return r_prepared_input_color.SampleLevel(s_LinearClamp, fUV, 0).w;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_LOCK_STATUS) || defined(FFX_INTERNAL)
+FfxFloat32x2 SampleLockStatus(FfxFloat32x2 fUV)
+{
+    FfxFloat32x2 fLockStatus = r_lock_status.SampleLevel(s_LinearClamp, fUV, 0);
+    return fLockStatus;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
+FfxFloat32 LoadReconstructedPrevDepth(FfxUInt32x2 iPxPos)
+{
+    return asfloat(r_reconstructed_previous_nearest_depth[iPxPos]);
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
+void StoreReconstructedDepth(FfxUInt32x2 iPxSample, FfxFloat32 fDepth)
+{
+    FfxUInt32 uDepth = asuint(fDepth);
+
+    #if FFX_FSR2_OPTION_INVERTED_DEPTH
+        InterlockedMax(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth);
+    #else
+        InterlockedMin(rw_reconstructed_previous_nearest_depth[iPxSample], uDepth); // min for standard, max for inverted depth
+    #endif
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH) || defined(FFX_INTERNAL)
+void SetReconstructedDepth(FfxUInt32x2 iPxSample, const FfxUInt32 uValue)
+{
+    rw_reconstructed_previous_nearest_depth[iPxSample] = uValue;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_DILATED_DEPTH) || defined(FFX_INTERNAL)
+void StoreDilatedDepth(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32 fDepth)
+{
+    rw_dilatedDepth[iPxPos] = fDepth;
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
+void StoreDilatedMotionVector(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fMotionVector)
+{
+    rw_dilated_motion_vectors[iPxPos] = fMotionVector;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
+FfxFloat32x2 LoadDilatedMotionVector(FfxUInt32x2 iPxInput)
+{
+    return r_dilated_motion_vectors[iPxInput].xy;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS) || defined(FFX_INTERNAL)
+FfxFloat32x2 LoadPreviousDilatedMotionVector(FfxUInt32x2 iPxInput)
+{
+    return r_previous_dilated_motion_vectors[iPxInput].xy;
+}
+
+FfxFloat32x2 SamplePreviousDilatedMotionVector(FfxFloat32x2 uv)
+{
+    return r_previous_dilated_motion_vectors.SampleLevel(s_LinearClamp, uv, 0).xy;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_DEPTH) || defined(FFX_INTERNAL)
+FfxFloat32 LoadDilatedDepth(FfxUInt32x2 iPxInput)
+{
+    return r_dilatedDepth[iPxInput];
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_EXPOSURE) || defined(FFX_INTERNAL)
+FfxFloat32 Exposure()
+{
+    FfxFloat32 exposure = r_input_exposure[FfxUInt32x2(0, 0)].x;
+
+    if (exposure == 0.0f) {
+        exposure = 1.0f;
+    }
+
+    return exposure;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_AUTO_EXPOSURE) || defined(FFX_INTERNAL)
+FfxFloat32 AutoExposure()
+{
+    FfxFloat32 exposure = r_auto_exposure[FfxUInt32x2(0, 0)].x;
+
+    if (exposure == 0.0f) {
+        exposure = 1.0f;
+    }
+
+    return exposure;
+}
+#endif
+
+FfxFloat32 SampleLanczos2Weight(FfxFloat32 x)
+{
+#if defined(FSR2_BIND_SRV_LANCZOS_LUT) || defined(FFX_INTERNAL)
+    return r_lanczos_lut.SampleLevel(s_LinearClamp, FfxFloat32x2(x / 2, 0.5f), 0);
+#else
+    return 0.f;
+#endif
+}
+
+#if defined(FSR2_BIND_SRV_UPSCALE_MAXIMUM_BIAS_LUT) || defined(FFX_INTERNAL)
+FfxFloat32 SampleUpsampleMaximumBias(FfxFloat32x2 uv)
+{
+    // Stored as a SNORM, so make sure to multiply by 2 to retrieve the actual expected range.
+    return FfxFloat32(2.0) * r_upsample_maximum_bias_lut.SampleLevel(s_LinearClamp, abs(uv) * 2.0, 0);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
+FfxFloat32x2 SampleDilatedReactiveMasks(FfxFloat32x2 fUV)
+{
+	return r_dilated_reactive_masks.SampleLevel(s_LinearClamp, fUV, 0);
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
+FfxFloat32x2 LoadDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos)
+{
+    return r_dilated_reactive_masks[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_DILATED_REACTIVE_MASKS) || defined(FFX_INTERNAL)
+void StoreDilatedReactiveMasks(FFX_PARAMETER_IN FfxUInt32x2 iPxPos, FFX_PARAMETER_IN FfxFloat32x2 fDilatedReactiveMasks)
+{
+    rw_dilated_reactive_masks[iPxPos] = fDilatedReactiveMasks;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_INPUT_OPAQUE_ONLY) || defined(FFX_INTERNAL)
+FfxFloat32x3 LoadOpaqueOnly(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+    return r_input_opaque_only[iPxPos].xyz;
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
+FfxFloat32x3 LoadPrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+    return r_input_prev_color_pre_alpha[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
+FfxFloat32x3 LoadPrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos)
+{
+    return r_input_prev_color_post_alpha[iPxPos];
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_AUTOREACTIVE) || defined(FFX_INTERNAL)
+#if defined(FSR2_BIND_UAV_AUTOCOMPOSITION) || defined(FFX_INTERNAL)
+void StoreAutoReactive(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F2 fReactive)
+{
+    rw_output_autoreactive[iPxPos] = fReactive.x;
+
+    rw_output_autocomposition[iPxPos] = fReactive.y;
+}
+#endif
+#endif
+
+#if defined(FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR) || defined(FFX_INTERNAL)
+void StorePrevPreAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
+{
+    rw_output_prev_color_pre_alpha[iPxPos] = color;
+
+}
+#endif
+
+#if defined(FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR) || defined(FFX_INTERNAL)
+void StorePrevPostAlpha(FFX_PARAMETER_IN FFX_MIN16_I2 iPxPos, FFX_PARAMETER_IN FFX_MIN16_F3 color)
+{
+    rw_output_prev_color_post_alpha[iPxPos] = color;
+}
+#endif
+
+#endif // #if defined(FFX_GPU)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
new file mode 100644
index 0000000000..0c72aa8494
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_common.h
@@ -0,0 +1,565 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#if !defined(FFX_FSR2_COMMON_H)
+#define FFX_FSR2_COMMON_H
+
+#if defined(FFX_CPU) || defined(FFX_GPU)
+//Locks
+#define LOCK_LIFETIME_REMAINING 0
+#define LOCK_TEMPORAL_LUMA 1
+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
+
+#if defined(FFX_GPU)
+FFX_STATIC const FfxFloat32 FSR2_FP16_MIN = 6.10e-05f;
+FFX_STATIC const FfxFloat32 FSR2_FP16_MAX = 65504.0f;
+FFX_STATIC const FfxFloat32 FSR2_EPSILON = 1e-03f;
+FFX_STATIC const FfxFloat32 FSR2_TONEMAP_EPSILON = 1.0f / FSR2_FP16_MAX;
+FFX_STATIC const FfxFloat32 FSR2_FLT_MAX = 3.402823466e+38f;
+FFX_STATIC const FfxFloat32 FSR2_FLT_MIN = 1.175494351e-38f;
+
+// treat vector truncation warnings as errors
+#pragma warning(error: 3206)
+
+// suppress warnings
+#pragma warning(disable: 3205)  // conversion from larger type to smaller
+#pragma warning(disable: 3571)  // in ffxPow(f, e), f could be negative
+
+// Reconstructed depth usage
+FFX_STATIC const FfxFloat32 fReconstructedDepthBilinearWeightThreshold = 0.01f;
+
+// Accumulation
+FFX_STATIC const FfxFloat32 fUpsampleLanczosWeightScale = 1.0f / 12.0f;
+FFX_STATIC const FfxFloat32 fMaxAccumulationLanczosWeight = 1.0f;
+FFX_STATIC const FfxFloat32 fAverageLanczosWeightPerFrame = 0.74f * fUpsampleLanczosWeightScale; // Average lanczos weight for jitter accumulated samples
+FFX_STATIC const FfxFloat32 fAccumulationMaxOnMotion = 3.0f * fUpsampleLanczosWeightScale;
+
+// Auto exposure
+FFX_STATIC const FfxFloat32 resetAutoExposureAverageSmoothing = 1e8f;
+
+struct AccumulationPassCommonParams
+{
+    FfxInt32x2 iPxHrPos;
+    FfxFloat32x2 fHrUv;
+    FfxFloat32x2 fLrUv_HwSampler;
+    FfxFloat32x2 fMotionVector;
+    FfxFloat32x2 fReprojectedHrUv;
+    FfxFloat32 fHrVelocity;
+    FfxFloat32 fDepthClipFactor;
+    FfxFloat32 fDilatedReactiveFactor;
+    FfxFloat32 fAccumulationMask;
+
+    FfxBoolean bIsResetFrame;
+    FfxBoolean bIsExistingSample;
+    FfxBoolean bIsNewSample;
+};
+
+struct LockState
+{
+    FfxBoolean NewLock; //Set for both unique new and re-locked new
+    FfxBoolean WasLockedPrevFrame; //Set to identify if the pixel was already locked (relock)
+};
+
+void InitializeNewLockSample(FFX_PARAMETER_OUT FfxFloat32x2 fLockStatus)
+{
+    fLockStatus = FfxFloat32x2(0, 0);
+}
+
+#if FFX_HALF
+void InitializeNewLockSample(FFX_PARAMETER_OUT FFX_MIN16_F2 fLockStatus)
+{
+    fLockStatus = FFX_MIN16_F2(0, 0);
+}
+#endif
+
+
+void KillLock(FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus)
+{
+    fLockStatus[LOCK_LIFETIME_REMAINING] = 0;
+}
+
+#if FFX_HALF
+void KillLock(FFX_PARAMETER_INOUT FFX_MIN16_F2 fLockStatus)
+{
+    fLockStatus[LOCK_LIFETIME_REMAINING] = FFX_MIN16_F(0);
+}
+#endif
+
+struct RectificationBox
+{
+    FfxFloat32x3 boxCenter;
+    FfxFloat32x3 boxVec;
+    FfxFloat32x3 aabbMin;
+    FfxFloat32x3 aabbMax;
+    FfxFloat32 fBoxCenterWeight;
+};
+#if FFX_HALF
+struct RectificationBoxMin16
+{
+    FFX_MIN16_F3 boxCenter;
+    FFX_MIN16_F3 boxVec;
+    FFX_MIN16_F3 aabbMin;
+    FFX_MIN16_F3 aabbMax;
+    FFX_MIN16_F fBoxCenterWeight;
+};
+#endif
+
+void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
+{
+    rectificationBox.fBoxCenterWeight = FfxFloat32(0);
+
+    rectificationBox.boxCenter = FfxFloat32x3(0, 0, 0);
+    rectificationBox.boxVec = FfxFloat32x3(0, 0, 0);
+    rectificationBox.aabbMin = FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
+    rectificationBox.aabbMax = -FfxFloat32x3(FSR2_FLT_MAX, FSR2_FLT_MAX, FSR2_FLT_MAX);
+}
+#if FFX_HALF
+void RectificationBoxReset(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
+{
+    rectificationBox.fBoxCenterWeight = FFX_MIN16_F(0);
+
+    rectificationBox.boxCenter = FFX_MIN16_F3(0, 0, 0);
+    rectificationBox.boxVec = FFX_MIN16_F3(0, 0, 0);
+    rectificationBox.aabbMin = FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
+    rectificationBox.aabbMax = -FFX_MIN16_F3(FSR2_FP16_MAX, FSR2_FP16_MAX, FSR2_FP16_MAX);
+}
+#endif
+
+void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
+{
+    rectificationBox.aabbMin = colorSample;
+    rectificationBox.aabbMax = colorSample;
+
+    FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
+    rectificationBox.boxCenter = weightedSample;
+    rectificationBox.boxVec = colorSample * weightedSample;
+    rectificationBox.fBoxCenterWeight = fSampleWeight;
+}
+
+void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBox rectificationBox, const FfxFloat32x3 colorSample, const FfxFloat32 fSampleWeight)
+{
+    if (bInitialSample) {
+        RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
+    } else {
+        rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
+        rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
+
+        FfxFloat32x3 weightedSample = colorSample * fSampleWeight;
+        rectificationBox.boxCenter += weightedSample;
+        rectificationBox.boxVec += colorSample * weightedSample;
+        rectificationBox.fBoxCenterWeight += fSampleWeight;
+    }
+}
+#if FFX_HALF
+void RectificationBoxAddInitialSample(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
+{
+    rectificationBox.aabbMin = colorSample;
+    rectificationBox.aabbMax = colorSample;
+
+    FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
+    rectificationBox.boxCenter = weightedSample;
+    rectificationBox.boxVec = colorSample * weightedSample;
+    rectificationBox.fBoxCenterWeight = fSampleWeight;
+}
+
+void RectificationBoxAddSample(FfxBoolean bInitialSample, FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox, const FFX_MIN16_F3 colorSample, const FFX_MIN16_F fSampleWeight)
+{
+    if (bInitialSample) {
+        RectificationBoxAddInitialSample(rectificationBox, colorSample, fSampleWeight);
+    } else {
+        rectificationBox.aabbMin = ffxMin(rectificationBox.aabbMin, colorSample);
+        rectificationBox.aabbMax = ffxMax(rectificationBox.aabbMax, colorSample);
+
+        FFX_MIN16_F3 weightedSample = colorSample * fSampleWeight;
+        rectificationBox.boxCenter += weightedSample;
+        rectificationBox.boxVec += colorSample * weightedSample;
+        rectificationBox.fBoxCenterWeight += fSampleWeight;
+    }
+}
+#endif
+
+void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBox rectificationBox)
+{
+    rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FfxFloat32(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FfxFloat32(1.f));
+    rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
+    rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
+    FfxFloat32x3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
+    rectificationBox.boxVec = stdDev;
+}
+#if FFX_HALF
+void RectificationBoxComputeVarianceBoxData(FFX_PARAMETER_INOUT RectificationBoxMin16 rectificationBox)
+{
+    rectificationBox.fBoxCenterWeight = (abs(rectificationBox.fBoxCenterWeight) > FFX_MIN16_F(FSR2_EPSILON) ? rectificationBox.fBoxCenterWeight : FFX_MIN16_F(1.f));
+    rectificationBox.boxCenter /= rectificationBox.fBoxCenterWeight;
+    rectificationBox.boxVec /= rectificationBox.fBoxCenterWeight;
+    FFX_MIN16_F3 stdDev = sqrt(abs(rectificationBox.boxVec - rectificationBox.boxCenter * rectificationBox.boxCenter));
+    rectificationBox.boxVec = stdDev;
+}
+#endif
+
+FfxFloat32x3 SafeRcp3(FfxFloat32x3 v)
+{
+    return (all(FFX_NOT_EQUAL(v, FfxFloat32x3(0, 0, 0)))) ? (FfxFloat32x3(1, 1, 1) / v) : FfxFloat32x3(0, 0, 0);
+}
+#if FFX_HALF
+FFX_MIN16_F3 SafeRcp3(FFX_MIN16_F3 v)
+{
+    return (all(FFX_NOT_EQUAL(v, FFX_MIN16_F3(0, 0, 0)))) ? (FFX_MIN16_F3(1, 1, 1) / v) : FFX_MIN16_F3(0, 0, 0);
+}
+#endif
+
+FfxFloat32 MinDividedByMax(const FfxFloat32 v0, const FfxFloat32 v1)
+{
+    const FfxFloat32 m = ffxMax(v0, v1);
+    return m != 0 ? ffxMin(v0, v1) / m : 0;
+}
+
+#if FFX_HALF
+FFX_MIN16_F MinDividedByMax(const FFX_MIN16_F v0, const FFX_MIN16_F v1)
+{
+    const FFX_MIN16_F m = ffxMax(v0, v1);
+    return m != FFX_MIN16_F(0) ? ffxMin(v0, v1) / m : FFX_MIN16_F(0);
+}
+#endif
+
+FfxFloat32x3 YCoCgToRGB(FfxFloat32x3 fYCoCg)
+{
+    FfxFloat32x3 fRgb;
+
+    fRgb = FfxFloat32x3(
+        fYCoCg.x + fYCoCg.y - fYCoCg.z,
+        fYCoCg.x + fYCoCg.z,
+        fYCoCg.x - fYCoCg.y - fYCoCg.z);
+
+    return fRgb;
+}
+#if FFX_HALF
+FFX_MIN16_F3 YCoCgToRGB(FFX_MIN16_F3 fYCoCg)
+{
+    FFX_MIN16_F3 fRgb;
+
+    fRgb = FFX_MIN16_F3(
+        fYCoCg.x + fYCoCg.y - fYCoCg.z,
+        fYCoCg.x + fYCoCg.z,
+        fYCoCg.x - fYCoCg.y - fYCoCg.z);
+
+    return fRgb;
+}
+#endif
+
+FfxFloat32x3 RGBToYCoCg(FfxFloat32x3 fRgb)
+{
+    FfxFloat32x3 fYCoCg;
+
+    fYCoCg = FfxFloat32x3(
+        0.25f * fRgb.r + 0.5f * fRgb.g + 0.25f * fRgb.b,
+        0.5f * fRgb.r - 0.5f * fRgb.b,
+        -0.25f * fRgb.r + 0.5f * fRgb.g - 0.25f * fRgb.b);
+
+    return fYCoCg;
+}
+#if FFX_HALF
+FFX_MIN16_F3 RGBToYCoCg(FFX_MIN16_F3 fRgb)
+{
+    FFX_MIN16_F3 fYCoCg;
+
+    fYCoCg = FFX_MIN16_F3(
+        0.25 * fRgb.r + 0.5 * fRgb.g + 0.25 * fRgb.b,
+        0.5 * fRgb.r - 0.5 * fRgb.b,
+        -0.25 * fRgb.r + 0.5 * fRgb.g - 0.25 * fRgb.b);
+
+    return fYCoCg;
+}
+#endif
+
+FfxFloat32 RGBToLuma(FfxFloat32x3 fLinearRgb)
+{
+    return dot(fLinearRgb, FfxFloat32x3(0.2126f, 0.7152f, 0.0722f));
+}
+#if FFX_HALF
+FFX_MIN16_F RGBToLuma(FFX_MIN16_F3 fLinearRgb)
+{
+    return dot(fLinearRgb, FFX_MIN16_F3(0.2126f, 0.7152f, 0.0722f));
+}
+#endif
+
+FfxFloat32 RGBToPerceivedLuma(FfxFloat32x3 fLinearRgb)
+{
+    FfxFloat32 fLuminance = RGBToLuma(fLinearRgb);
+
+    FfxFloat32 fPercievedLuminance = 0;
+    if (fLuminance <= 216.0f / 24389.0f) {
+        fPercievedLuminance = fLuminance * (24389.0f / 27.0f);
+    }
+    else {
+        fPercievedLuminance = ffxPow(fLuminance, 1.0f / 3.0f) * 116.0f - 16.0f;
+    }
+
+    return fPercievedLuminance * 0.01f;
+}
+#if FFX_HALF
+FFX_MIN16_F RGBToPerceivedLuma(FFX_MIN16_F3 fLinearRgb)
+{
+    FFX_MIN16_F fLuminance = RGBToLuma(fLinearRgb);
+
+    FFX_MIN16_F fPercievedLuminance = FFX_MIN16_F(0);
+    if (fLuminance <= FFX_MIN16_F(216.0f / 24389.0f)) {
+        fPercievedLuminance = fLuminance * FFX_MIN16_F(24389.0f / 27.0f);
+    }
+    else {
+        fPercievedLuminance = ffxPow(fLuminance, FFX_MIN16_F(1.0f / 3.0f)) * FFX_MIN16_F(116.0f) - FFX_MIN16_F(16.0f);
+    }
+
+    return fPercievedLuminance * FFX_MIN16_F(0.01f);
+}
+#endif
+
+FfxFloat32x3 Tonemap(FfxFloat32x3 fRgb)
+{
+    return fRgb / (ffxMax(ffxMax(0.f, fRgb.r), ffxMax(fRgb.g, fRgb.b)) + 1.f).xxx;
+}
+
+FfxFloat32x3 InverseTonemap(FfxFloat32x3 fRgb)
+{
+    return fRgb / ffxMax(FSR2_TONEMAP_EPSILON, 1.f - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
+}
+
+#if FFX_HALF
+FFX_MIN16_F3 Tonemap(FFX_MIN16_F3 fRgb)
+{
+    return fRgb / (ffxMax(ffxMax(FFX_MIN16_F(0.f), fRgb.r), ffxMax(fRgb.g, fRgb.b)) + FFX_MIN16_F(1.f)).xxx;
+}
+
+FFX_MIN16_F3 InverseTonemap(FFX_MIN16_F3 fRgb)
+{
+    return fRgb / ffxMax(FFX_MIN16_F(FSR2_TONEMAP_EPSILON), FFX_MIN16_F(1.f) - ffxMax(fRgb.r, ffxMax(fRgb.g, fRgb.b))).xxx;
+}
+#endif
+
+FfxInt32x2 ClampLoad(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
+{
+    FfxInt32x2 result = iPxSample + iPxOffset;
+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
+    return result;
+
+    // return ffxMed3(iPxSample + iPxOffset, FfxInt32x2(0, 0), iTextureSize - FfxInt32x2(1, 1));
+}
+#if FFX_HALF
+FFX_MIN16_I2 ClampLoad(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
+{
+    FFX_MIN16_I2 result = iPxSample + iPxOffset;
+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
+    return result;
+
+    // return ffxMed3Half(iPxSample + iPxOffset, FFX_MIN16_I2(0, 0), iTextureSize - FFX_MIN16_I2(1, 1));
+}
+#endif
+
+FfxFloat32x2 ClampUv(FfxFloat32x2 fUv, FfxInt32x2 iTextureSize, FfxInt32x2 iResourceSize)
+{
+    const FfxFloat32x2 fSampleLocation = fUv * iTextureSize;
+    const FfxFloat32x2 fClampedLocation = ffxMax(FfxFloat32x2(0.5f, 0.5f), ffxMin(fSampleLocation, FfxFloat32x2(iTextureSize) - FfxFloat32x2(0.5f, 0.5f)));
+    const FfxFloat32x2 fClampedUv = fClampedLocation / FfxFloat32x2(iResourceSize);
+
+    return fClampedUv;
+}
+
+FfxBoolean IsOnScreen(FfxInt32x2 pos, FfxInt32x2 size)
+{
+    return all(FFX_LESS_THAN(FfxUInt32x2(pos), FfxUInt32x2(size)));
+}
+#if FFX_HALF
+FfxBoolean IsOnScreen(FFX_MIN16_I2 pos, FFX_MIN16_I2 size)
+{
+    return all(FFX_LESS_THAN(FFX_MIN16_U2(pos), FFX_MIN16_U2(size)));
+}
+#endif
+
+FfxFloat32 ComputeAutoExposureFromLavg(FfxFloat32 Lavg)
+{
+    Lavg = exp(Lavg);
+
+    const FfxFloat32 S = 100.0f; //ISO arithmetic speed
+    const FfxFloat32 K = 12.5f;
+    FfxFloat32 ExposureISO100 = log2((Lavg * S) / K);
+
+    const FfxFloat32 q = 0.65f;
+    FfxFloat32 Lmax = (78.0f / (q * S)) * ffxPow(2.0f, ExposureISO100);
+
+    return 1 / Lmax;
+}
+#if FFX_HALF
+FFX_MIN16_F ComputeAutoExposureFromLavg(FFX_MIN16_F Lavg)
+{
+    Lavg = exp(Lavg);
+
+    const FFX_MIN16_F S = FFX_MIN16_F(100.0f); //ISO arithmetic speed
+    const FFX_MIN16_F K = FFX_MIN16_F(12.5f);
+    const FFX_MIN16_F ExposureISO100 = log2((Lavg * S) / K);
+
+    const FFX_MIN16_F q = FFX_MIN16_F(0.65f);
+    const FFX_MIN16_F Lmax = (FFX_MIN16_F(78.0f) / (q * S)) * ffxPow(FFX_MIN16_F(2.0f), ExposureISO100);
+
+    return FFX_MIN16_F(1) / Lmax;
+}
+#endif
+
+FfxInt32x2 ComputeHrPosFromLrPos(FfxInt32x2 iPxLrPos)
+{
+    FfxFloat32x2 fSrcJitteredPos = FfxFloat32x2(iPxLrPos) + 0.5f - Jitter();
+    FfxFloat32x2 fLrPosInHr = (fSrcJitteredPos / RenderSize()) * DisplaySize();
+    FfxInt32x2 iPxHrPos = FfxInt32x2(floor(fLrPosInHr));
+    return iPxHrPos;
+}
+#if FFX_HALF
+FFX_MIN16_I2 ComputeHrPosFromLrPos(FFX_MIN16_I2 iPxLrPos)
+{
+    FFX_MIN16_F2 fSrcJitteredPos = FFX_MIN16_F2(iPxLrPos) + FFX_MIN16_F(0.5f) - FFX_MIN16_F2(Jitter());
+    FFX_MIN16_F2 fLrPosInHr = (fSrcJitteredPos / FFX_MIN16_F2(RenderSize())) * FFX_MIN16_F2(DisplaySize());
+    FFX_MIN16_I2 iPxHrPos = FFX_MIN16_I2(floor(fLrPosInHr));
+    return iPxHrPos;
+}
+#endif
+
+FfxFloat32x2 ComputeNdc(FfxFloat32x2 fPxPos, FfxInt32x2 iSize)
+{
+    return fPxPos / FfxFloat32x2(iSize) * FfxFloat32x2(2.0f, -2.0f) + FfxFloat32x2(-1.0f, 1.0f);
+}
+
+FfxFloat32 GetViewSpaceDepth(FfxFloat32 fDeviceDepth)
+{
+    const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
+
+    // fDeviceToViewDepth details found in ffx_fsr2.cpp
+    return (fDeviceToViewDepth[1] / (fDeviceDepth - fDeviceToViewDepth[0]));
+}
+
+FfxFloat32 GetViewSpaceDepthInMeters(FfxFloat32 fDeviceDepth)
+{
+    return GetViewSpaceDepth(fDeviceDepth) * ViewSpaceToMetersFactor();
+}
+
+FfxFloat32x3 GetViewSpacePosition(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
+{
+    const FfxFloat32x4 fDeviceToViewDepth = DeviceToViewSpaceTransformFactors();
+
+    const FfxFloat32 Z = GetViewSpaceDepth(fDeviceDepth);
+
+    const FfxFloat32x2 fNdcPos = ComputeNdc(iViewportPos, iViewportSize);
+    const FfxFloat32 X = fDeviceToViewDepth[2] * fNdcPos.x * Z;
+    const FfxFloat32 Y = fDeviceToViewDepth[3] * fNdcPos.y * Z;
+
+    return FfxFloat32x3(X, Y, Z);
+}
+
+FfxFloat32x3 GetViewSpacePositionInMeters(FfxInt32x2 iViewportPos, FfxInt32x2 iViewportSize, FfxFloat32 fDeviceDepth)
+{
+    return GetViewSpacePosition(iViewportPos, iViewportSize, fDeviceDepth) * ViewSpaceToMetersFactor();
+}
+
+FfxFloat32 GetMaxDistanceInMeters()
+{
+#if FFX_FSR2_OPTION_INVERTED_DEPTH
+    return GetViewSpaceDepth(0.0f) * ViewSpaceToMetersFactor();
+#else
+    return GetViewSpaceDepth(1.0f) * ViewSpaceToMetersFactor();
+#endif
+}
+
+FfxFloat32x3 PrepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure, FfxFloat32 fPreExposure)
+{
+    fRgb /= fPreExposure;
+    fRgb *= fExposure;
+
+    fRgb = clamp(fRgb, 0.0f, FSR2_FP16_MAX);
+
+    return fRgb;
+}
+
+FfxFloat32x3 UnprepareRgb(FfxFloat32x3 fRgb, FfxFloat32 fExposure)
+{
+    fRgb /= fExposure;
+    fRgb *= PreExposure();
+
+    return fRgb;
+}
+
+
+struct BilinearSamplingData
+{
+    FfxInt32x2 iOffsets[4];
+    FfxFloat32 fWeights[4];
+    FfxInt32x2 iBasePos;
+};
+
+BilinearSamplingData GetBilinearSamplingData(FfxFloat32x2 fUv, FfxInt32x2 iSize)
+{
+    BilinearSamplingData data;
+
+    FfxFloat32x2 fPxSample = (fUv * iSize) - FfxFloat32x2(0.5f, 0.5f);
+    data.iBasePos = FfxInt32x2(floor(fPxSample));
+    FfxFloat32x2 fPxFrac = ffxFract(fPxSample);
+
+    data.iOffsets[0] = FfxInt32x2(0, 0);
+    data.iOffsets[1] = FfxInt32x2(1, 0);
+    data.iOffsets[2] = FfxInt32x2(0, 1);
+    data.iOffsets[3] = FfxInt32x2(1, 1);
+
+    data.fWeights[0] = (1 - fPxFrac.x) * (1 - fPxFrac.y);
+    data.fWeights[1] = (fPxFrac.x) * (1 - fPxFrac.y);
+    data.fWeights[2] = (1 - fPxFrac.x) * (fPxFrac.y);
+    data.fWeights[3] = (fPxFrac.x) * (fPxFrac.y);
+
+    return data;
+}
+
+struct PlaneData
+{
+    FfxFloat32x3 fNormal;
+    FfxFloat32 fDistanceFromOrigin;
+};
+
+PlaneData GetPlaneFromPoints(FfxFloat32x3 fP0, FfxFloat32x3 fP1, FfxFloat32x3 fP2)
+{
+    PlaneData plane;
+
+    FfxFloat32x3 v0 = fP0 - fP1;
+    FfxFloat32x3 v1 = fP0 - fP2;
+    plane.fNormal = normalize(cross(v0, v1));
+    plane.fDistanceFromOrigin = -dot(fP0, plane.fNormal);
+
+    return plane;
+}
+
+FfxFloat32 PointToPlaneDistance(PlaneData plane, FfxFloat32x3 fPoint)
+{
+    return abs(dot(plane.fNormal, fPoint) + plane.fDistanceFromOrigin);
+}
+
+#endif // #if defined(FFX_GPU)
+
+#endif //!defined(FFX_FSR2_COMMON_H)
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
new file mode 100644
index 0000000000..c63f1820e0
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid.h
@@ -0,0 +1,189 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+FFX_GROUPSHARED FfxUInt32 spdCounter;
+
+#ifndef SPD_PACKED_ONLY
+FFX_GROUPSHARED FfxFloat32 spdIntermediateR[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateG[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateB[16][16];
+FFX_GROUPSHARED FfxFloat32 spdIntermediateA[16][16];
+
+FfxFloat32x4 SpdLoadSourceImage(FfxFloat32x2 tex, FfxUInt32 slice)
+{
+    FfxFloat32x2 fUv = (tex + 0.5f + Jitter()) / RenderSize();
+    fUv = ClampUv(fUv, RenderSize(), InputColorResourceDimensions());
+    FfxFloat32x3 fRgb = SampleInputColor(fUv);
+
+    fRgb /= PreExposure();
+   
+    //compute log luma
+    const FfxFloat32 fLogLuma = log(ffxMax(FSR2_EPSILON, RGBToLuma(fRgb)));
+
+    // Make sure out of screen pixels contribute no value to the end result
+    const FfxFloat32 result = all(FFX_LESS_THAN(tex, RenderSize())) ? fLogLuma : 0.0f;
+
+    return FfxFloat32x4(result, 0, 0, 0);
+}
+
+FfxFloat32x4 SpdLoad(FfxInt32x2 tex, FfxUInt32 slice)
+{
+    return SPD_LoadMipmap5(tex);
+}
+
+void SpdStore(FfxInt32x2 pix, FfxFloat32x4 outValue, FfxUInt32 index, FfxUInt32 slice)
+{
+    if (index == LumaMipLevelToUse() || index == 5)
+    {
+        SPD_SetMipmap(pix, index, outValue.r);
+    }
+
+    if (index == MipCount() - 1) { //accumulate on 1x1 level
+
+        if (all(FFX_EQUAL(pix, FfxInt32x2(0, 0))))
+        {
+            FfxFloat32 prev = SPD_LoadExposureBuffer().y;
+            FfxFloat32 result = outValue.r;
+
+            if (prev < resetAutoExposureAverageSmoothing) // Compare Lavg, so small or negative values
+            {
+                FfxFloat32 rate = 1.0f;
+                result = prev + (result - prev) * (1 - exp(-DeltaTime() * rate));
+            }
+            FfxFloat32x2 spdOutput = FfxFloat32x2(ComputeAutoExposureFromLavg(result), result);
+            SPD_SetExposureBuffer(spdOutput);
+        }
+    }
+}
+
+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
+{
+    SPD_IncreaseAtomicCounter(spdCounter);
+}
+
+FfxUInt32 SpdGetAtomicCounter()
+{
+    return spdCounter;
+}
+
+void SpdResetAtomicCounter(FfxUInt32 slice)
+{
+    SPD_ResetAtomicCounter();
+}
+
+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
+{
+    return FfxFloat32x4(
+        spdIntermediateR[x][y],
+        spdIntermediateG[x][y],
+        spdIntermediateB[x][y],
+        spdIntermediateA[x][y]);
+}
+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
+{
+    spdIntermediateR[x][y] = value.x;
+    spdIntermediateG[x][y] = value.y;
+    spdIntermediateB[x][y] = value.z;
+    spdIntermediateA[x][y] = value.w;
+}
+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
+{
+    return (v0 + v1 + v2 + v3) * 0.25f;
+}
+#endif
+
+// define fetch and store functions Packed
+#if FFX_HALF
+#error Callback must be implemented
+
+FFX_GROUPSHARED FfxFloat16x2 spdIntermediateRG[16][16];
+FFX_GROUPSHARED FfxFloat16x2 spdIntermediateBA[16][16];
+
+FfxFloat16x4 SpdLoadSourceImageH(FfxFloat32x2 tex, FfxUInt32 slice)
+{
+    return FfxFloat16x4(imgDst[0][FfxFloat32x3(tex, slice)]);
+}
+FfxFloat16x4 SpdLoadH(FfxInt32x2 p, FfxUInt32 slice)
+{
+    return FfxFloat16x4(imgDst6[FfxUInt32x3(p, slice)]);
+}
+void SpdStoreH(FfxInt32x2 p, FfxFloat16x4 value, FfxUInt32 mip, FfxUInt32 slice)
+{
+    if (index == LumaMipLevelToUse() || index == 5)
+    {
+        imgDst6[FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
+        return;
+    }
+    imgDst[mip + 1][FfxUInt32x3(p, slice)] = FfxFloat32x4(value);
+}
+void SpdIncreaseAtomicCounter(FfxUInt32 slice)
+{
+    InterlockedAdd(rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice], 1, spdCounter);
+}
+FfxUInt32 SpdGetAtomicCounter()
+{
+    return spdCounter;
+}
+void SpdResetAtomicCounter(FfxUInt32 slice)
+{
+    rw_spd_global_atomic[FfxInt16x2(0, 0)].counter[slice] = 0;
+}
+FfxFloat16x4 SpdLoadIntermediateH(FfxUInt32 x, FfxUInt32 y)
+{
+    return FfxFloat16x4(
+        spdIntermediateRG[x][y].x,
+        spdIntermediateRG[x][y].y,
+        spdIntermediateBA[x][y].x,
+        spdIntermediateBA[x][y].y);
+}
+void SpdStoreIntermediateH(FfxUInt32 x, FfxUInt32 y, FfxFloat16x4 value)
+{
+    spdIntermediateRG[x][y] = value.xy;
+    spdIntermediateBA[x][y] = value.zw;
+}
+FfxFloat16x4 SpdReduce4H(FfxFloat16x4 v0, FfxFloat16x4 v1, FfxFloat16x4 v2, FfxFloat16x4 v3)
+{
+    return (v0 + v1 + v2 + v3) * FfxFloat16(0.25);
+}
+#endif
+
+#include "ffx_spd.h"
+
+void ComputeAutoExposure(FfxUInt32x3 WorkGroupId, FfxUInt32 LocalThreadIndex)
+{
+#if FFX_HALF
+    SpdDownsampleH(
+        FfxUInt32x2(WorkGroupId.xy),
+        FfxUInt32(LocalThreadIndex),
+        FfxUInt32(MipCount()),
+        FfxUInt32(NumWorkGroups()),
+        FfxUInt32(WorkGroupId.z),
+        FfxUInt32x2(WorkGroupOffset()));
+#else
+    SpdDownsample(
+        FfxUInt32x2(WorkGroupId.xy),
+        FfxUInt32(LocalThreadIndex),
+        FfxUInt32(MipCount()),
+        FfxUInt32(NumWorkGroups()),
+        FfxUInt32(WorkGroupId.z),
+        FfxUInt32x2(WorkGroupOffset()));
+#endif
+}
+\ No newline at end of file
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
new file mode 100644
index 0000000000..088e425452
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_compute_luminance_pyramid_pass.glsl
@@ -0,0 +1,134 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_samplerless_texture_functions : require
+
+#define FSR2_BIND_SRV_INPUT_COLOR                     0
+#define FSR2_BIND_UAV_SPD_GLOBAL_ATOMIC               1
+#define FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE        2
+#define FSR2_BIND_UAV_EXPOSURE_MIP_5                  3
+#define FSR2_BIND_UAV_AUTO_EXPOSURE                   4
+#define FSR2_BIND_CB_FSR2                             5
+#define FSR2_BIND_CB_SPD                              6
+
+#include "ffx_fsr2_callbacks_glsl.h"
+#include "ffx_fsr2_common.h"
+
+#if defined(FSR2_BIND_CB_SPD)
+	layout (set = 1, binding = FSR2_BIND_CB_SPD, std140) uniform cbSPD_t
+	{
+		uint mips;
+		uint numWorkGroups;
+		uvec2 workGroupOffset;
+		uvec2 renderSize;
+	} cbSPD;
+
+	uint MipCount()
+	{
+		return cbSPD.mips;
+	}
+
+	uint NumWorkGroups()
+	{
+		return cbSPD.numWorkGroups;
+	}
+
+	uvec2 WorkGroupOffset()
+	{
+		return cbSPD.workGroupOffset;
+	}
+
+	uvec2 SPD_RenderSize()
+	{
+		return cbSPD.renderSize;
+	}
+#endif
+
+vec2 SPD_LoadExposureBuffer()
+{
+	return imageLoad(rw_auto_exposure, ivec2(0,0)).xy;
+}
+
+void SPD_SetExposureBuffer(vec2 value)
+{
+	imageStore(rw_auto_exposure, ivec2(0,0), vec4(value, 0.0f, 0.0f));
+}
+
+vec4 SPD_LoadMipmap5(ivec2 iPxPos)
+{
+	return vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f);
+}
+
+void SPD_SetMipmap(ivec2 iPxPos, uint slice, float value)
+{
+	switch (slice)
+	{
+	case FFX_FSR2_SHADING_CHANGE_MIP_LEVEL:
+		imageStore(rw_img_mip_shading_change, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
+		break;
+	case 5:
+		imageStore(rw_img_mip_5, iPxPos, vec4(value, 0.0f, 0.0f, 0.0f));
+		break;
+	default:
+
+        // avoid flattened side effect
+#if defined(FSR2_BIND_UAV_EXPOSURE_MIP_LUMA_CHANGE)
+		imageStore(rw_img_mip_shading_change, iPxPos, vec4(imageLoad(rw_img_mip_shading_change, iPxPos).x, 0.0f, 0.0f, 0.0f));
+#elif defined(FSR2_BIND_UAV_EXPOSURE_MIP_5)
+		imageStore(rw_img_mip_5, iPxPos, vec4(imageLoad(rw_img_mip_5, iPxPos).x, 0.0f, 0.0f, 0.0f));
+#endif
+		break;
+	}
+}
+
+void SPD_IncreaseAtomicCounter(inout uint spdCounter)
+{
+	spdCounter = imageAtomicAdd(rw_spd_global_atomic, ivec2(0,0), 1);
+}
+
+void SPD_ResetAtomicCounter()
+{
+	imageStore(rw_spd_global_atomic, ivec2(0,0), uvec4(0));
+}
+
+#include "ffx_fsr2_compute_luminance_pyramid.h"
+
+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#define FFX_FSR2_THREAD_GROUP_WIDTH 256
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR2_NUM_THREADS
+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
+#endif // #ifndef FFX_FSR2_NUM_THREADS
+
+FFX_FSR2_NUM_THREADS
+void main()
+{
+    ComputeAutoExposure(gl_WorkGroupID.xyz, gl_LocalInvocationIndex);
+}
+\ No newline at end of file
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
new file mode 100644
index 0000000000..fa4c975a23
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip.h
@@ -0,0 +1,258 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_DEPTH_CLIP_H
+#define FFX_FSR2_DEPTH_CLIP_H
+
+FFX_STATIC const FfxFloat32 DepthClipBaseScale = 4.0f;
+
+FfxFloat32 ComputeDepthClip(FfxFloat32x2 fUvSample, FfxFloat32 fCurrentDepthSample)
+{
+    FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(fCurrentDepthSample);
+    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fUvSample, RenderSize());
+
+    FfxFloat32 fDilatedSum = 0.0f;
+    FfxFloat32 fDepth = 0.0f;
+    FfxFloat32 fWeightSum = 0.0f;
+    for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+
+        const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+        const FfxInt32x2 iSamplePos = bilinearInfo.iBasePos + iOffset;
+
+        if (IsOnScreen(iSamplePos, RenderSize())) {
+            const FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+            if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
+
+                const FfxFloat32 fPrevDepthSample = LoadReconstructedPrevDepth(iSamplePos);
+                const FfxFloat32 fPrevNearestDepthViewSpace = GetViewSpaceDepth(fPrevDepthSample);
+
+                const FfxFloat32 fDepthDiff = fCurrentDepthViewSpace - fPrevNearestDepthViewSpace;
+
+                if (fDepthDiff > 0.0f) {
+
+#if FFX_FSR2_OPTION_INVERTED_DEPTH
+                    const FfxFloat32 fPlaneDepth = ffxMin(fPrevDepthSample, fCurrentDepthSample);
+#else
+                    const FfxFloat32 fPlaneDepth = ffxMax(fPrevDepthSample, fCurrentDepthSample);
+#endif
+                    
+                    const FfxFloat32x3 fCenter = GetViewSpacePosition(FfxInt32x2(RenderSize() * 0.5f), RenderSize(), fPlaneDepth);
+                    const FfxFloat32x3 fCorner = GetViewSpacePosition(FfxInt32x2(0, 0), RenderSize(), fPlaneDepth);
+
+                    const FfxFloat32 fHalfViewportWidth = length(FfxFloat32x2(RenderSize()));
+                    const FfxFloat32 fDepthThreshold = ffxMax(fCurrentDepthViewSpace, fPrevNearestDepthViewSpace);
+
+                    const FfxFloat32 Ksep = 1.37e-05f;
+                    const FfxFloat32 Kfov = length(fCorner) / length(fCenter);
+                    const FfxFloat32 fRequiredDepthSeparation = Ksep * Kfov * fHalfViewportWidth * fDepthThreshold;
+
+                    const FfxFloat32 fResolutionFactor = ffxSaturate(length(FfxFloat32x2(RenderSize())) / length(FfxFloat32x2(1920.0f, 1080.0f)));
+                    const FfxFloat32 fPower = ffxLerp(1.0f, 3.0f, fResolutionFactor);
+                    fDepth += ffxPow(ffxSaturate(FfxFloat32(fRequiredDepthSeparation / fDepthDiff)), fPower) * fWeight;
+                    fWeightSum += fWeight;
+                }
+            }
+        }
+    }
+
+    return (fWeightSum > 0) ? ffxSaturate(1.0f - fDepth / fWeightSum) : 0.0f;
+}
+
+FfxFloat32 ComputeMotionDivergence(FfxInt32x2 iPxPos, FfxInt32x2 iPxInputMotionVectorSize)
+{
+    FfxFloat32 minconvergence = 1.0f;
+
+    FfxFloat32x2 fMotionVectorNucleus = LoadInputMotionVector(iPxPos);
+    FfxFloat32 fNucleusVelocityLr = length(fMotionVectorNucleus * RenderSize());
+    FfxFloat32 fMaxVelocityUv = length(fMotionVectorNucleus);
+
+    const FfxFloat32 MotionVectorVelocityEpsilon = 1e-02f;
+
+    if (fNucleusVelocityLr > MotionVectorVelocityEpsilon) {
+        for (FfxInt32 y = -1; y <= 1; ++y) {
+            for (FfxInt32 x = -1; x <= 1; ++x) {
+
+                FfxInt32x2 sp = ClampLoad(iPxPos, FfxInt32x2(x, y), iPxInputMotionVectorSize);
+
+                FfxFloat32x2 fMotionVector = LoadInputMotionVector(sp);
+                FfxFloat32 fVelocityUv = length(fMotionVector);
+
+                fMaxVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
+                fVelocityUv = ffxMax(fVelocityUv, fMaxVelocityUv);
+                minconvergence = ffxMin(minconvergence, dot(fMotionVector / fVelocityUv, fMotionVectorNucleus / fVelocityUv));
+            }
+        }
+    }
+
+    return ffxSaturate(1.0f - minconvergence) * ffxSaturate(fMaxVelocityUv / 0.01f);
+}
+
+FfxFloat32 ComputeDepthDivergence(FfxInt32x2 iPxPos)
+{
+    const FfxFloat32 fMaxDistInMeters = GetMaxDistanceInMeters();
+    FfxFloat32 fDepthMax = 0.0f;
+    FfxFloat32 fDepthMin = fMaxDistInMeters;
+
+    FfxInt32 iMaxDistFound = 0;
+
+    for (FfxInt32 y = -1; y < 2; y++) {
+        for (FfxInt32 x = -1; x < 2; x++) {
+
+            const FfxInt32x2 iOffset = FfxInt32x2(x, y);
+            const FfxInt32x2 iSamplePos = iPxPos + iOffset;
+
+            const FfxFloat32 fOnScreenFactor = IsOnScreen(iSamplePos, RenderSize()) ? 1.0f : 0.0f;
+            FfxFloat32 fDepth = GetViewSpaceDepthInMeters(LoadDilatedDepth(iSamplePos)) * fOnScreenFactor;
+
+            iMaxDistFound |= FfxInt32(fMaxDistInMeters == fDepth);
+
+            fDepthMin = ffxMin(fDepthMin, fDepth);
+            fDepthMax = ffxMax(fDepthMax, fDepth);
+        }
+    }
+
+    return (1.0f - fDepthMin / fDepthMax) * (FfxBoolean(iMaxDistFound) ? 0.0f : 1.0f);
+}
+
+FfxFloat32 ComputeTemporalMotionDivergence(FfxInt32x2 iPxPos)
+{
+    const FfxFloat32x2 fUv = FfxFloat32x2(iPxPos + 0.5f) / RenderSize();
+
+    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
+    FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
+    fReprojectedUv = ClampUv(fReprojectedUv, RenderSize(), MaxRenderSize());
+    FfxFloat32x2 fPrevMotionVector = SamplePreviousDilatedMotionVector(fReprojectedUv);
+
+    float fPxDistance = length(fMotionVector * DisplaySize());
+    return fPxDistance > 1.0f ? ffxLerp(0.0f, 1.0f - ffxSaturate(length(fPrevMotionVector) / length(fMotionVector)), ffxSaturate(ffxPow(fPxDistance / 20.0f, 3.0f))) : 0;
+}
+
+void PreProcessReactiveMasks(FfxInt32x2 iPxLrPos, FfxFloat32 fMotionDivergence)
+{
+    // Compensate for bilinear sampling in accumulation pass
+
+    FfxFloat32x3 fReferenceColor = LoadInputColor(iPxLrPos).xyz;
+    FfxFloat32x2 fReactiveFactor = FfxFloat32x2(0.0f, fMotionDivergence);
+
+    float fMasksSum = 0.0f;
+
+    FfxFloat32x3 fColorSamples[9];
+    FfxFloat32 fReactiveSamples[9];
+    FfxFloat32 fTransparencyAndCompositionSamples[9];
+
+    FFX_UNROLL
+    for (FfxInt32 y = -1; y < 2; y++) {
+        FFX_UNROLL
+        for (FfxInt32 x = -1; x < 2; x++) {
+
+            const FfxInt32x2 sampleCoord = ClampLoad(iPxLrPos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
+
+            FfxInt32 sampleIdx = (y + 1) * 3 + x + 1;
+
+            FfxFloat32x3 fColorSample = LoadInputColor(sampleCoord).xyz;
+            FfxFloat32 fReactiveSample = LoadReactiveMask(sampleCoord);
+            FfxFloat32 fTransparencyAndCompositionSample = LoadTransparencyAndCompositionMask(sampleCoord);
+
+            fColorSamples[sampleIdx] = fColorSample;
+            fReactiveSamples[sampleIdx] = fReactiveSample;
+            fTransparencyAndCompositionSamples[sampleIdx] = fTransparencyAndCompositionSample;
+
+            fMasksSum += (fReactiveSample + fTransparencyAndCompositionSample);
+        }
+    }
+
+    if (fMasksSum > 0)
+    {
+        for (FfxInt32 sampleIdx = 0; sampleIdx < 9; sampleIdx++)
+        {
+            FfxFloat32x3 fColorSample = fColorSamples[sampleIdx];
+            FfxFloat32 fReactiveSample = fReactiveSamples[sampleIdx];
+            FfxFloat32 fTransparencyAndCompositionSample = fTransparencyAndCompositionSamples[sampleIdx];
+
+            const FfxFloat32 fMaxLenSq = ffxMax(dot(fReferenceColor, fReferenceColor), dot(fColorSample, fColorSample));
+            const FfxFloat32 fSimilarity = dot(fReferenceColor, fColorSample) / fMaxLenSq;
+
+            // Increase power for non-similar samples
+            const FfxFloat32 fPowerBiasMax = 6.0f;
+            const FfxFloat32 fSimilarityPower = 1.0f + (fPowerBiasMax - fSimilarity * fPowerBiasMax);
+            const FfxFloat32 fWeightedReactiveSample = ffxPow(fReactiveSample, fSimilarityPower);
+            const FfxFloat32 fWeightedTransparencyAndCompositionSample = ffxPow(fTransparencyAndCompositionSample, fSimilarityPower);
+
+            fReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32x2(fWeightedReactiveSample, fWeightedTransparencyAndCompositionSample));
+        }
+    }
+
+    StoreDilatedReactiveMasks(iPxLrPos, fReactiveFactor);
+}
+
+FfxFloat32x3 ComputePreparedInputColor(FfxInt32x2 iPxLrPos)
+{
+    //We assume linear data. if non-linear input (sRGB, ...),
+    //then we should convert to linear first and back to sRGB on output.
+    FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
+
+    fRgb = PrepareRgb(fRgb, Exposure(), PreExposure());
+
+    const FfxFloat32x3 fPreparedYCoCg = RGBToYCoCg(fRgb);
+
+    return fPreparedYCoCg;
+}
+
+FfxFloat32 EvaluateSurface(FfxInt32x2 iPxPos, FfxFloat32x2 fMotionVector)
+{
+    FfxFloat32 d0 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, -1)));
+    FfxFloat32 d1 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 0)));
+    FfxFloat32 d2 = GetViewSpaceDepth(LoadReconstructedPrevDepth(iPxPos + FfxInt32x2(0, 1)));
+
+    return 1.0f - FfxFloat32(((d0 - d1) > (d1 * 0.01f)) && ((d1 - d2) > (d2 * 0.01f)));
+}
+
+void DepthClip(FfxInt32x2 iPxPos)
+{
+    FfxFloat32x2 fDepthUv = (iPxPos + 0.5f) / RenderSize();
+    FfxFloat32x2 fMotionVector = LoadDilatedMotionVector(iPxPos);
+
+    // Discard tiny mvs
+    fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.01f);
+
+    const FfxFloat32x2 fDilatedUv = fDepthUv + fMotionVector;
+    const FfxFloat32 fDilatedDepth = LoadDilatedDepth(iPxPos);
+    const FfxFloat32 fCurrentDepthViewSpace = GetViewSpaceDepth(LoadInputDepth(iPxPos));
+
+    // Compute prepared input color and depth clip
+    FfxFloat32 fDepthClip = ComputeDepthClip(fDilatedUv, fDilatedDepth) * EvaluateSurface(iPxPos, fMotionVector);
+    FfxFloat32x3 fPreparedYCoCg = ComputePreparedInputColor(iPxPos);
+    StorePreparedInputColor(iPxPos, FfxFloat32x4(fPreparedYCoCg, fDepthClip));
+
+    // Compute dilated reactive mask
+#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+    FfxInt32x2 iSamplePos = iPxPos;
+#else
+    FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxPos);
+#endif
+
+    FfxFloat32 fMotionDivergence = ComputeMotionDivergence(iSamplePos, RenderSize());
+    FfxFloat32 fTemporalMotionDifference = ffxSaturate(ComputeTemporalMotionDivergence(iPxPos) - ComputeDepthDivergence(iPxPos));
+
+    PreProcessReactiveMasks(iPxPos, ffxMax(fTemporalMotionDifference, fMotionDivergence));
+}
+
+#endif //!defined( FFX_FSR2_DEPTH_CLIPH )
+\ No newline at end of file
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
new file mode 100644
index 0000000000..65cc8b67ef
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_depth_clip_pass.glsl
@@ -0,0 +1,67 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_samplerless_texture_functions : require
+
+#define FSR2_BIND_SRV_RECONSTRUCTED_PREV_NEAREST_DEPTH      0
+#define FSR2_BIND_SRV_DILATED_MOTION_VECTORS                1
+#define FSR2_BIND_SRV_DILATED_DEPTH                         2
+#define FSR2_BIND_SRV_REACTIVE_MASK                         3
+#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK     4
+#define FSR2_BIND_SRV_PREPARED_INPUT_COLOR                  5
+#define FSR2_BIND_SRV_PREVIOUS_DILATED_MOTION_VECTORS       6
+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS                  7
+#define FSR2_BIND_SRV_INPUT_COLOR                           8
+#define FSR2_BIND_SRV_INPUT_DEPTH                           9
+#define FSR2_BIND_SRV_INPUT_EXPOSURE                        10
+
+#define FSR2_BIND_UAV_DEPTH_CLIP                            11
+#define FSR2_BIND_UAV_DILATED_REACTIVE_MASKS                12
+#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR                  13
+
+#define FSR2_BIND_CB_FSR2                                   14
+
+#include "ffx_fsr2_callbacks_glsl.h"
+#include "ffx_fsr2_common.h"
+#include "ffx_fsr2_sample.h"
+#include "ffx_fsr2_depth_clip.h"
+
+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR2_NUM_THREADS
+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
+#endif // #ifndef FFX_FSR2_NUM_THREADS
+
+FFX_FSR2_NUM_THREADS
+void main()
+{
+	DepthClip(ivec2(gl_GlobalInvocationID.xy));
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h
new file mode 100644
index 0000000000..3bd4d5d912
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_begin.h
@@ -0,0 +1 @@
+// This file doesn't exist in this version of FSR.
+\ No newline at end of file
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h
new file mode 100644
index 0000000000..3bd4d5d912
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_force16_end.h
@@ -0,0 +1 @@
+// This file doesn't exist in this version of FSR.
+\ No newline at end of file
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
new file mode 100644
index 0000000000..8347fa86bc
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock.h
@@ -0,0 +1,115 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_LOCK_H
+#define FFX_FSR2_LOCK_H
+
+void ClearResourcesForNextFrame(in FfxInt32x2 iPxHrPos)
+{
+    if (all(FFX_LESS_THAN(iPxHrPos, FfxInt32x2(RenderSize()))))
+    {
+#if FFX_FSR2_OPTION_INVERTED_DEPTH
+        const FfxUInt32 farZ = 0x0;
+#else
+        const FfxUInt32 farZ = 0x3f800000;
+#endif
+        SetReconstructedDepth(iPxHrPos, farZ);
+    }
+}
+
+FfxBoolean ComputeThinFeatureConfidence(FfxInt32x2 pos)
+{
+    const FfxInt32 RADIUS = 1;
+
+    FfxFloat32 fNucleus = LoadLockInputLuma(pos);
+
+    FfxFloat32 similar_threshold = 1.05f;
+    FfxFloat32 dissimilarLumaMin = FSR2_FLT_MAX;
+    FfxFloat32 dissimilarLumaMax = 0;
+
+    /*
+     0 1 2
+     3 4 5
+     6 7 8
+    */
+
+    #define SETBIT(x) (1U << x)
+
+    FfxUInt32 mask = SETBIT(4); //flag fNucleus as similar
+
+    const FfxUInt32 uNumRejectionMasks = 4;
+    const FfxUInt32 uRejectionMasks[uNumRejectionMasks] = {
+        SETBIT(0) | SETBIT(1) | SETBIT(3) | SETBIT(4), //Upper left
+        SETBIT(1) | SETBIT(2) | SETBIT(4) | SETBIT(5), //Upper right
+        SETBIT(3) | SETBIT(4) | SETBIT(6) | SETBIT(7), //Lower left
+        SETBIT(4) | SETBIT(5) | SETBIT(7) | SETBIT(8), //Lower right
+    };
+
+    FfxInt32 idx = 0;
+    FFX_UNROLL
+    for (FfxInt32 y = -RADIUS; y <= RADIUS; y++) {
+        FFX_UNROLL
+        for (FfxInt32 x = -RADIUS; x <= RADIUS; x++, idx++) {
+            if (x == 0 && y == 0) continue;
+
+            FfxInt32x2 samplePos = ClampLoad(pos, FfxInt32x2(x, y), FfxInt32x2(RenderSize()));
+
+            FfxFloat32 sampleLuma = LoadLockInputLuma(samplePos);
+            FfxFloat32 difference = ffxMax(sampleLuma, fNucleus) / ffxMin(sampleLuma, fNucleus);
+
+            if (difference > 0 && (difference < similar_threshold)) {
+                mask |= SETBIT(idx);
+            } else {
+                dissimilarLumaMin = ffxMin(dissimilarLumaMin, sampleLuma);
+                dissimilarLumaMax = ffxMax(dissimilarLumaMax, sampleLuma);
+            }
+        }
+    }
+
+    FfxBoolean isRidge = fNucleus > dissimilarLumaMax || fNucleus < dissimilarLumaMin;
+
+    if (FFX_FALSE == isRidge) {
+
+        return false;
+    }
+
+    FFX_UNROLL
+    for (FfxInt32 i = 0; i < 4; i++) {
+
+        if ((mask & uRejectionMasks[i]) == uRejectionMasks[i]) {
+            return false;
+        }
+    }
+    
+    return true;
+}
+
+void ComputeLock(FfxInt32x2 iPxLrPos)
+{
+    if (ComputeThinFeatureConfidence(iPxLrPos))
+    {
+        StoreNewLocks(ComputeHrPosFromLrPos(iPxLrPos), 1.f);
+    }
+
+    ClearResourcesForNextFrame(iPxLrPos);
+}
+
+#endif // FFX_FSR2_LOCK_H
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
new file mode 100644
index 0000000000..0adce1bb11
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_lock_pass.glsl
@@ -0,0 +1,56 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_samplerless_texture_functions : require
+
+#define FSR2_BIND_SRV_LOCK_INPUT_LUMA                       0
+#define FSR2_BIND_UAV_NEW_LOCKS                             1
+#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH      2
+#define FSR2_BIND_CB_FSR2                                   3
+
+#include "ffx_fsr2_callbacks_glsl.h"
+#include "ffx_fsr2_common.h"
+#include "ffx_fsr2_sample.h"
+#include "ffx_fsr2_lock.h"
+
+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR2_NUM_THREADS
+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
+#endif // #ifndef FFX_FSR2_NUM_THREADS
+
+FFX_FSR2_NUM_THREADS
+void main()
+{
+    uvec2 uDispatchThreadId = gl_WorkGroupID.xy * uvec2(FFX_FSR2_THREAD_GROUP_WIDTH, FFX_FSR2_THREAD_GROUP_HEIGHT) + gl_LocalInvocationID.xy;
+
+    ComputeLock(ivec2(uDispatchThreadId));
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
new file mode 100644
index 0000000000..cee9e148ba
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_postprocess_lock_status.h
@@ -0,0 +1,106 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
+#define FFX_FSR2_POSTPROCESS_LOCK_STATUS_H
+
+FfxFloat32x4 WrapShadingChangeLuma(FfxInt32x2 iPxSample)
+{
+    return FfxFloat32x4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 WrapShadingChangeLuma(FFX_MIN16_I2 iPxSample)
+{
+    return FFX_MIN16_F4(LoadMipLuma(iPxSample, LumaMipLevelToUse()), 0, 0, 0);
+}
+#endif
+
+#if FFX_FSR2_OPTION_POSTPROCESSLOCKSTATUS_SAMPLERS_USE_DATA_HALF && FFX_HALF
+DeclareCustomFetchBilinearSamplesMin16(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
+#else
+DeclareCustomFetchBicubicSamples(FetchShadingChangeLumaSamples, WrapShadingChangeLuma)
+#endif
+DeclareCustomTextureSample(ShadingChangeLumaSample, Lanczos2, FetchShadingChangeLumaSamples)
+
+FfxFloat32 GetShadingChangeLuma(FfxInt32x2 iPxHrPos, FfxFloat32x2 fUvCoord)
+{
+    FfxFloat32 fShadingChangeLuma = 0;
+
+#if 0
+    fShadingChangeLuma = Exposure() * exp(ShadingChangeLumaSample(fUvCoord, LumaMipDimensions()).x);
+#else
+
+    const FfxFloat32 fDiv = FfxFloat32(2 << LumaMipLevelToUse());
+    FfxInt32x2 iMipRenderSize = FfxInt32x2(RenderSize() / fDiv);
+
+    fUvCoord = ClampUv(fUvCoord, iMipRenderSize, LumaMipDimensions());
+    fShadingChangeLuma = Exposure() * exp(FfxFloat32(SampleMipLuma(fUvCoord, LumaMipLevelToUse())));
+#endif
+
+    fShadingChangeLuma = ffxPow(fShadingChangeLuma, 1.0f / 6.0f);
+
+    return fShadingChangeLuma;
+}
+
+void UpdateLockStatus(AccumulationPassCommonParams params,
+    FFX_PARAMETER_INOUT FfxFloat32 fReactiveFactor, LockState state,
+    FFX_PARAMETER_INOUT FfxFloat32x2 fLockStatus,
+    FFX_PARAMETER_OUT FfxFloat32 fLockContributionThisFrame,
+    FFX_PARAMETER_OUT FfxFloat32 fLuminanceDiff) {
+
+    const FfxFloat32 fShadingChangeLuma = GetShadingChangeLuma(params.iPxHrPos, params.fHrUv);
+
+    //init temporal shading change factor, init to -1 or so in reproject to know if "true new"?
+    fLockStatus[LOCK_TEMPORAL_LUMA] = (fLockStatus[LOCK_TEMPORAL_LUMA] == FfxFloat32(0.0f)) ? fShadingChangeLuma : fLockStatus[LOCK_TEMPORAL_LUMA];
+
+    FfxFloat32 fPreviousShadingChangeLuma = fLockStatus[LOCK_TEMPORAL_LUMA];
+
+    fLuminanceDiff = 1.0f - MinDividedByMax(fPreviousShadingChangeLuma, fShadingChangeLuma);
+
+    if (state.NewLock) {
+        fLockStatus[LOCK_TEMPORAL_LUMA] = fShadingChangeLuma;
+
+        fLockStatus[LOCK_LIFETIME_REMAINING] = (fLockStatus[LOCK_LIFETIME_REMAINING] != 0.0f) ? 2.0f : 1.0f;
+    }
+    else if(fLockStatus[LOCK_LIFETIME_REMAINING] <= 1.0f) {
+        fLockStatus[LOCK_TEMPORAL_LUMA] = ffxLerp(fLockStatus[LOCK_TEMPORAL_LUMA], FfxFloat32(fShadingChangeLuma), 0.5f);
+    }
+    else {
+        if (fLuminanceDiff > 0.1f) {
+            KillLock(fLockStatus);
+        }
+    }
+
+    fReactiveFactor = ffxMax(fReactiveFactor, ffxSaturate((fLuminanceDiff - 0.1f) * 10.0f));
+    fLockStatus[LOCK_LIFETIME_REMAINING] *= (1.0f - fReactiveFactor);
+
+    fLockStatus[LOCK_LIFETIME_REMAINING] *= ffxSaturate(1.0f - params.fAccumulationMask);
+    fLockStatus[LOCK_LIFETIME_REMAINING] *= FfxFloat32(params.fDepthClipFactor < 0.1f);
+
+    // Compute this frame lock contribution
+    const FfxFloat32 fLifetimeContribution = ffxSaturate(fLockStatus[LOCK_LIFETIME_REMAINING] - 1.0f);
+    const FfxFloat32 fShadingChangeContribution = ffxSaturate(MinDividedByMax(fLockStatus[LOCK_TEMPORAL_LUMA], fShadingChangeLuma));
+
+    fLockContributionThisFrame = ffxSaturate(ffxSaturate(fLifetimeContribution * 4.0f) * fShadingChangeContribution);
+}
+
+#endif //!defined( FFX_FSR2_POSTPROCESS_LOCK_STATUS_H )
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h
new file mode 100644
index 0000000000..d9006cd8ee
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas.h
@@ -0,0 +1,67 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define GROUP_SIZE  8
+
+#define FSR_RCAS_DENOISE 1
+
+void WriteUpscaledOutput(FFX_MIN16_U2 iPxHrPos, FfxFloat32x3 fUpscaledColor)
+{
+    StoreUpscaledOutput(FFX_MIN16_I2(iPxHrPos), fUpscaledColor);
+}
+
+#define FSR_RCAS_F
+FfxFloat32x4 FsrRcasLoadF(FfxInt32x2 p)
+{
+    FfxFloat32x4 fColor = LoadRCAS_Input(p);
+
+    fColor.rgb = PrepareRgb(fColor.rgb, Exposure(), PreExposure());
+
+    return fColor;
+}
+
+void FsrRcasInputF(inout FfxFloat32 r, inout FfxFloat32 g, inout FfxFloat32 b) {}
+
+#include "ffx_fsr1.h"
+
+
+void CurrFilter(FFX_MIN16_U2 pos)
+{
+    FfxFloat32x3 c;
+    FsrRcasF(c.r, c.g, c.b, pos, RCASConfig());
+
+    c = UnprepareRgb(c, Exposure());
+
+    WriteUpscaledOutput(pos, c);
+}
+
+void RCAS(FfxUInt32x3 LocalThreadId, FfxUInt32x3 WorkGroupId, FfxUInt32x3 Dtid)
+{
+    // Do remapping of local xy in workgroup for a more PS-like swizzle pattern.
+    FfxUInt32x2 gxy = ffxRemapForQuad(LocalThreadId.x) + FfxUInt32x2(WorkGroupId.x << 4u, WorkGroupId.y << 4u);
+    CurrFilter(FFX_MIN16_U2(gxy));
+    gxy.x += 8u;
+    CurrFilter(FFX_MIN16_U2(gxy));
+    gxy.y += 8u;
+    CurrFilter(FFX_MIN16_U2(gxy));
+    gxy.x -= 8u;
+    CurrFilter(FFX_MIN16_U2(gxy));
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
new file mode 100644
index 0000000000..f78fa53e6e
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_rcas_pass.glsl
@@ -0,0 +1,80 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_samplerless_texture_functions : require
+// Needed for rw_upscaled_output declaration
+#extension GL_EXT_shader_image_load_formatted : require
+
+#define FSR2_BIND_SRV_INPUT_EXPOSURE        0
+#define FSR2_BIND_SRV_RCAS_INPUT            1
+#define FSR2_BIND_UAV_UPSCALED_OUTPUT       2
+#define FSR2_BIND_CB_FSR2                   3
+#define FSR2_BIND_CB_RCAS                   4
+
+#include "ffx_fsr2_callbacks_glsl.h"
+#include "ffx_fsr2_common.h"
+
+//Move to prototype shader!
+#if defined(FSR2_BIND_CB_RCAS)
+    layout (set = 1, binding = FSR2_BIND_CB_RCAS, std140) uniform cbRCAS_t
+    {
+        uvec4 rcasConfig;
+    } cbRCAS;
+
+    uvec4 RCASConfig()
+    {
+        return cbRCAS.rcasConfig;
+    }
+#else
+    uvec4 RCASConfig()
+    {
+        return uvec4(0);
+    }
+#endif
+
+vec4 LoadRCAS_Input(FfxInt32x2 iPxPos)
+{
+    return texelFetch(r_rcas_input, iPxPos, 0);
+}
+
+#include "ffx_fsr2_rcas.h"
+
+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#define FFX_FSR2_THREAD_GROUP_WIDTH 64
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#define FFX_FSR2_THREAD_GROUP_HEIGHT 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR2_NUM_THREADS
+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
+#endif // #ifndef FFX_FSR2_NUM_THREADS
+
+FFX_FSR2_NUM_THREADS
+void main()
+{
+    RCAS(gl_LocalInvocationID.xyz, gl_WorkGroupID.xyz, gl_GlobalInvocationID.xyz);
+}
+\ No newline at end of file
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
new file mode 100644
index 0000000000..e9ccc4bc8c
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h
@@ -0,0 +1,145 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
+#define FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H
+
+void ReconstructPrevDepth(FfxInt32x2 iPxPos, FfxFloat32 fDepth, FfxFloat32x2 fMotionVector, FfxInt32x2 iPxDepthSize)
+{
+    fMotionVector *= FfxFloat32(length(fMotionVector * DisplaySize()) > 0.1f);
+
+    FfxFloat32x2 fUv = (iPxPos + FfxFloat32(0.5)) / iPxDepthSize;
+    FfxFloat32x2 fReprojectedUv = fUv + fMotionVector;
+ 
+    BilinearSamplingData bilinearInfo = GetBilinearSamplingData(fReprojectedUv, RenderSize());
+
+    // Project current depth into previous frame locations.
+    // Push to all pixels having some contribution if reprojection is using bilinear logic.
+    for (FfxInt32 iSampleIndex = 0; iSampleIndex < 4; iSampleIndex++) {
+        
+        const FfxInt32x2 iOffset = bilinearInfo.iOffsets[iSampleIndex];
+        FfxFloat32 fWeight = bilinearInfo.fWeights[iSampleIndex];
+
+        if (fWeight > fReconstructedDepthBilinearWeightThreshold) {
+
+            FfxInt32x2 iStorePos = bilinearInfo.iBasePos + iOffset;
+            if (IsOnScreen(iStorePos, iPxDepthSize)) {
+                StoreReconstructedDepth(iStorePos, fDepth);
+            }
+        }
+    }
+}
+
+void FindNearestDepth(FFX_PARAMETER_IN FfxInt32x2 iPxPos, FFX_PARAMETER_IN FfxInt32x2 iPxSize, FFX_PARAMETER_OUT FfxFloat32 fNearestDepth, FFX_PARAMETER_OUT FfxInt32x2 fNearestDepthCoord)
+{
+    const FfxInt32 iSampleCount = 9;
+    const FfxInt32x2 iSampleOffsets[iSampleCount] = {
+        FfxInt32x2(+0, +0),
+        FfxInt32x2(+1, +0),
+        FfxInt32x2(+0, +1),
+        FfxInt32x2(+0, -1),
+        FfxInt32x2(-1, +0),
+        FfxInt32x2(-1, +1),
+        FfxInt32x2(+1, +1),
+        FfxInt32x2(-1, -1),
+        FfxInt32x2(+1, -1),
+    };
+
+    // pull out the depth loads to allow SC to batch them
+    FfxFloat32 depth[9];
+    FfxInt32 iSampleIndex = 0;
+    FFX_UNROLL
+    for (iSampleIndex = 0; iSampleIndex < iSampleCount; ++iSampleIndex) {
+
+        FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+        depth[iSampleIndex] = LoadInputDepth(iPos);
+    }
+
+    // find closest depth
+    fNearestDepthCoord = iPxPos;
+    fNearestDepth = depth[0];
+    FFX_UNROLL
+    for (iSampleIndex = 1; iSampleIndex < iSampleCount; ++iSampleIndex) {
+
+        FfxInt32x2 iPos = iPxPos + iSampleOffsets[iSampleIndex];
+        if (IsOnScreen(iPos, iPxSize)) {
+
+            FfxFloat32 fNdDepth = depth[iSampleIndex];
+#if FFX_FSR2_OPTION_INVERTED_DEPTH
+            if (fNdDepth > fNearestDepth) {
+#else
+            if (fNdDepth < fNearestDepth) {
+#endif
+                fNearestDepthCoord = iPos;
+                fNearestDepth = fNdDepth;
+            }
+        }
+    }
+}
+
+FfxFloat32 ComputeLockInputLuma(FfxInt32x2 iPxLrPos)
+{
+    //We assume linear data. if non-linear input (sRGB, ...),
+    //then we should convert to linear first and back to sRGB on output.
+    FfxFloat32x3 fRgb = ffxMax(FfxFloat32x3(0, 0, 0), LoadInputColor(iPxLrPos));
+
+    // Use internal auto exposure for locking logic
+    fRgb /= PreExposure();
+    fRgb *= Exposure();
+
+#if FFX_FSR2_OPTION_HDR_COLOR_INPUT
+    fRgb = Tonemap(fRgb);
+#endif
+
+    //compute luma used to lock pixels, if used elsewhere the ffxPow must be moved!
+    const FfxFloat32 fLockInputLuma = ffxPow(RGBToPerceivedLuma(fRgb), FfxFloat32(1.0 / 6.0));
+
+    return fLockInputLuma;
+}
+
+void ReconstructAndDilate(FfxInt32x2 iPxLrPos)
+{
+    FfxFloat32 fDilatedDepth;
+    FfxInt32x2 iNearestDepthCoord;
+
+    FindNearestDepth(iPxLrPos, RenderSize(), fDilatedDepth, iNearestDepthCoord);
+
+#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+    FfxInt32x2 iSamplePos = iPxLrPos;
+    FfxInt32x2 iMotionVectorPos = iNearestDepthCoord;
+#else
+    FfxInt32x2 iSamplePos = ComputeHrPosFromLrPos(iPxLrPos);
+    FfxInt32x2 iMotionVectorPos = ComputeHrPosFromLrPos(iNearestDepthCoord);
+#endif
+
+    FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iMotionVectorPos);
+
+    StoreDilatedDepth(iPxLrPos, fDilatedDepth);
+    StoreDilatedMotionVector(iPxLrPos, fDilatedMotionVector);
+
+    ReconstructPrevDepth(iPxLrPos, fDilatedDepth, fDilatedMotionVector, RenderSize());
+
+    FfxFloat32 fLockInputLuma = ComputeLockInputLuma(iPxLrPos);
+    StoreLockInputLuma(iPxLrPos, fLockInputLuma);
+}
+
+
+#endif //!defined( FFX_FSR2_RECONSTRUCT_DILATED_VELOCITY_AND_PREVIOUS_DEPTH_H )
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
new file mode 100644
index 0000000000..25c18c0622
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reconstruct_previous_depth_pass.glsl
@@ -0,0 +1,65 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_samplerless_texture_functions : require
+
+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS                  0
+#define FSR2_BIND_SRV_INPUT_DEPTH                           1
+#define FSR2_BIND_SRV_INPUT_COLOR                           2
+#define FSR2_BIND_SRV_INPUT_EXPOSURE                        3
+#define FSR2_BIND_SRV_LUMA_HISTORY                          4
+
+#define FSR2_BIND_UAV_RECONSTRUCTED_PREV_NEAREST_DEPTH      5
+#define FSR2_BIND_UAV_DILATED_MOTION_VECTORS                6
+#define FSR2_BIND_UAV_DILATED_DEPTH                         7
+#define FSR2_BIND_UAV_PREPARED_INPUT_COLOR                  8
+#define FSR2_BIND_UAV_LUMA_HISTORY                          9
+#define FSR2_BIND_UAV_LUMA_INSTABILITY                      10
+#define FSR2_BIND_UAV_LOCK_INPUT_LUMA                       11
+
+#define FSR2_BIND_CB_FSR2                                   12
+
+#include "ffx_fsr2_callbacks_glsl.h"
+#include "ffx_fsr2_common.h"
+#include "ffx_fsr2_sample.h"
+#include "ffx_fsr2_reconstruct_dilated_velocity_and_previous_depth.h"
+
+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR2_NUM_THREADS
+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
+#endif // #ifndef FFX_FSR2_NUM_THREADS
+
+FFX_FSR2_NUM_THREADS
+void main()
+{
+	ReconstructAndDilate(FFX_MIN16_I2(gl_GlobalInvocationID.xy));
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
new file mode 100644
index 0000000000..f7f396129e
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_reproject.h
@@ -0,0 +1,136 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_REPROJECT_H
+#define FFX_FSR2_REPROJECT_H
+
+#ifndef FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE
+#define FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE 0 // Reference
+#endif
+
+FfxFloat32x4 WrapHistory(FfxInt32x2 iPxSample)
+{
+    return LoadHistory(iPxSample);
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 WrapHistory(FFX_MIN16_I2 iPxSample)
+{
+    return FFX_MIN16_F4(LoadHistory(iPxSample));
+}
+#endif
+
+
+#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
+DeclareCustomFetchBicubicSamplesMin16(FetchHistorySamples, WrapHistory)
+DeclareCustomTextureSampleMin16(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
+#else
+DeclareCustomFetchBicubicSamples(FetchHistorySamples, WrapHistory)
+DeclareCustomTextureSample(HistorySample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchHistorySamples)
+#endif
+
+FfxFloat32x4 WrapLockStatus(FfxInt32x2 iPxSample)
+{
+    FfxFloat32x4 fSample = FfxFloat32x4(LoadLockStatus(iPxSample), 0.0f, 0.0f);
+    return fSample;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 WrapLockStatus(FFX_MIN16_I2 iPxSample)
+{
+    FFX_MIN16_F4 fSample = FFX_MIN16_F4(LoadLockStatus(iPxSample), 0.0, 0.0);
+
+    return fSample;
+}
+#endif
+
+#if 1
+#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
+DeclareCustomFetchBilinearSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSampleMin16(LockStatusSample, Bilinear, FetchLockStatusSamples)
+#else
+DeclareCustomFetchBilinearSamples(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSample(LockStatusSample, Bilinear, FetchLockStatusSamples)
+#endif
+#else
+#if FFX_FSR2_OPTION_REPROJECT_SAMPLERS_USE_DATA_HALF && FFX_HALF
+DeclareCustomFetchBicubicSamplesMin16(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSampleMin16(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
+#else
+DeclareCustomFetchBicubicSamples(FetchLockStatusSamples, WrapLockStatus)
+DeclareCustomTextureSample(LockStatusSample, FFX_FSR2_GET_LANCZOS_SAMPLER1D(FFX_FSR2_OPTION_REPROJECT_USE_LANCZOS_TYPE), FetchLockStatusSamples)
+#endif
+#endif
+
+FfxFloat32x2 GetMotionVector(FfxInt32x2 iPxHrPos, FfxFloat32x2 fHrUv)
+{
+#if FFX_FSR2_OPTION_LOW_RESOLUTION_MOTION_VECTORS
+    FfxFloat32x2 fDilatedMotionVector = LoadDilatedMotionVector(FFX_MIN16_I2(fHrUv * RenderSize()));
+#else
+    FfxFloat32x2 fDilatedMotionVector = LoadInputMotionVector(iPxHrPos);
+#endif
+
+    return fDilatedMotionVector;
+}
+
+FfxBoolean IsUvInside(FfxFloat32x2 fUv)
+{
+    return (fUv.x >= 0.0f && fUv.x <= 1.0f) && (fUv.y >= 0.0f && fUv.y <= 1.0f);
+}
+
+void ComputeReprojectedUVs(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedHrUv, FFX_PARAMETER_OUT FfxBoolean bIsExistingSample)
+{
+    fReprojectedHrUv = params.fHrUv + params.fMotionVector;
+
+    bIsExistingSample = IsUvInside(fReprojectedHrUv);
+}
+
+void ReprojectHistoryColor(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x3 fHistoryColor, FFX_PARAMETER_OUT FfxFloat32 fTemporalReactiveFactor, FFX_PARAMETER_OUT FfxBoolean bInMotionLastFrame)
+{
+    FfxFloat32x4 fHistory = HistorySample(params.fReprojectedHrUv, DisplaySize());
+
+    fHistoryColor = PrepareRgb(fHistory.rgb, Exposure(), PreviousFramePreExposure());
+
+    fHistoryColor = RGBToYCoCg(fHistoryColor);
+
+    //Compute temporal reactivity info
+    fTemporalReactiveFactor = ffxSaturate(abs(fHistory.w));
+    bInMotionLastFrame = (fHistory.w < 0.0f);
+}
+
+LockState ReprojectHistoryLockStatus(const AccumulationPassCommonParams params, FFX_PARAMETER_OUT FfxFloat32x2 fReprojectedLockStatus)
+{
+    LockState state = { FFX_FALSE, FFX_FALSE };
+    const FfxFloat32 fNewLockIntensity = LoadRwNewLocks(params.iPxHrPos);
+    state.NewLock = fNewLockIntensity > (127.0f / 255.0f);
+
+    FfxFloat32 fInPlaceLockLifetime = state.NewLock ? fNewLockIntensity : 0;
+
+    fReprojectedLockStatus = SampleLockStatus(params.fReprojectedHrUv);
+
+    if (fReprojectedLockStatus[LOCK_LIFETIME_REMAINING] != FfxFloat32(0.0f)) {
+        state.WasLockedPrevFrame = true;
+    }
+
+    return state;
+}
+
+#endif //!defined( FFX_FSR2_REPROJECT_H )
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
new file mode 100644
index 0000000000..535dbc383c
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_resources.h
@@ -0,0 +1,105 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_RESOURCES_H
+#define FFX_FSR2_RESOURCES_H
+
+#if defined(FFX_CPU) || defined(FFX_GPU)
+#define FFX_FSR2_RESOURCE_IDENTIFIER_NULL                                           0
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_OPAQUE_ONLY                              1
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_COLOR                                    2
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_MOTION_VECTORS                           3
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_DEPTH                                    4
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_EXPOSURE                                 5
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_REACTIVE_MASK                            6
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INPUT_TRANSPARENCY_AND_COMPOSITION_MASK        7
+#define FFX_FSR2_RESOURCE_IDENTIFIER_RECONSTRUCTED_PREVIOUS_NEAREST_DEPTH           8
+#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_MOTION_VECTORS                         9
+#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_DEPTH                                  10
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR                        11
+#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS                                    12
+#define FFX_FSR2_RESOURCE_IDENTIFIER_NEW_LOCKS                                      13
+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREPARED_INPUT_COLOR                           14
+#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY                                   15
+#define FFX_FSR2_RESOURCE_IDENTIFIER_DEBUG_OUTPUT                                   16
+#define FFX_FSR2_RESOURCE_IDENTIFIER_LANCZOS_LUT                                    17
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SPD_ATOMIC_COUNT                               18
+#define FFX_FSR2_RESOURCE_IDENTIFIER_UPSCALED_OUTPUT                                19
+#define FFX_FSR2_RESOURCE_IDENTIFIER_RCAS_INPUT                                     20
+#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_1                                  21
+#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_STATUS_2                                  22
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_1                      23
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_UPSCALED_COLOR_2                      24
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_REACTIVITY                    25
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_TRANSPARENCY_AND_COMPOSITION  26
+#define FFX_FSR2_RESOURCE_IDENTITIER_UPSAMPLE_MAXIMUM_BIAS_LUT                      27
+#define FFX_FSR2_RESOURCE_IDENTIFIER_DILATED_REACTIVE_MASKS                         28
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE                                29 // same as FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0                       29
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_1                       30
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_2                       31
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_3                       32
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4                       33
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_5                       34
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_6                       35
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_7                       36
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_8                       37
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_9                       38
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_10                      39
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_11                      40
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12                      41
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DEFAULT_EXPOSURE                      42
+#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTO_EXPOSURE                                  43
+#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOREACTIVE                                   44
+#define FFX_FSR2_RESOURCE_IDENTIFIER_AUTOCOMPOSITION                                45
+
+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR                           46
+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR                          47
+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_1                         48
+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_1                        49
+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_PRE_ALPHA_COLOR_2                         50
+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREV_POST_ALPHA_COLOR_2                        51
+#define FFX_FSR2_RESOURCE_IDENTIFIER_PREVIOUS_DILATED_MOTION_VECTORS                52
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_1              53
+#define FFX_FSR2_RESOURCE_IDENTIFIER_INTERNAL_DILATED_MOTION_VECTORS_2              54
+#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_1                                 55
+#define FFX_FSR2_RESOURCE_IDENTIFIER_LUMA_HISTORY_2                                 56
+#define FFX_FSR2_RESOURCE_IDENTIFIER_LOCK_INPUT_LUMA                                57
+
+// Shading change detection mip level setting, value must be in the range [FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_0, FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_12]
+#define FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE          FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_4
+#define FFX_FSR2_SHADING_CHANGE_MIP_LEVEL                                           (FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE_MIPMAP_SHADING_CHANGE - FFX_FSR2_RESOURCE_IDENTIFIER_SCENE_LUMINANCE)
+
+#define FFX_FSR2_RESOURCE_IDENTIFIER_COUNT                                          58
+
+#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_FSR2                                     0
+#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_SPD                                      1
+#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_RCAS                                     2
+#define FFX_FSR2_CONSTANTBUFFER_IDENTIFIER_GENREACTIVE                              3
+
+#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_TONEMAP                                    1
+#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_INVERSETONEMAP                             2
+#define FFX_FSR2_AUTOREACTIVEFLAGS_APPLY_THRESHOLD                                  4
+#define FFX_FSR2_AUTOREACTIVEFLAGS_USE_COMPONENTS_MAX                               8
+
+#endif // #if defined(FFX_CPU) || defined(FFX_GPU)
+
+#endif //!defined( FFX_FSR2_RESOURCES_H )
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
new file mode 100644
index 0000000000..f94f40aa79
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_sample.h
@@ -0,0 +1,605 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_SAMPLE_H
+#define FFX_FSR2_SAMPLE_H
+
+// suppress warnings
+#ifdef FFX_HLSL
+#pragma warning(disable: 4008) // potentially divide by zero
+#endif //FFX_HLSL
+
+struct FetchedBilinearSamples {
+
+    FfxFloat32x4 fColor00;
+    FfxFloat32x4 fColor10;
+
+    FfxFloat32x4 fColor01;
+    FfxFloat32x4 fColor11;
+};
+
+struct FetchedBicubicSamples {
+
+    FfxFloat32x4 fColor00;
+    FfxFloat32x4 fColor10;
+    FfxFloat32x4 fColor20;
+    FfxFloat32x4 fColor30;
+
+    FfxFloat32x4 fColor01;
+    FfxFloat32x4 fColor11;
+    FfxFloat32x4 fColor21;
+    FfxFloat32x4 fColor31;
+
+    FfxFloat32x4 fColor02;
+    FfxFloat32x4 fColor12;
+    FfxFloat32x4 fColor22;
+    FfxFloat32x4 fColor32;
+
+    FfxFloat32x4 fColor03;
+    FfxFloat32x4 fColor13;
+    FfxFloat32x4 fColor23;
+    FfxFloat32x4 fColor33;
+};
+
+#if FFX_HALF
+struct FetchedBilinearSamplesMin16 {
+
+    FFX_MIN16_F4 fColor00;
+    FFX_MIN16_F4 fColor10;
+
+    FFX_MIN16_F4 fColor01;
+    FFX_MIN16_F4 fColor11;
+};
+
+struct FetchedBicubicSamplesMin16 {
+
+    FFX_MIN16_F4 fColor00;
+    FFX_MIN16_F4 fColor10;
+    FFX_MIN16_F4 fColor20;
+    FFX_MIN16_F4 fColor30;
+
+    FFX_MIN16_F4 fColor01;
+    FFX_MIN16_F4 fColor11;
+    FFX_MIN16_F4 fColor21;
+    FFX_MIN16_F4 fColor31;
+
+    FFX_MIN16_F4 fColor02;
+    FFX_MIN16_F4 fColor12;
+    FFX_MIN16_F4 fColor22;
+    FFX_MIN16_F4 fColor32;
+
+    FFX_MIN16_F4 fColor03;
+    FFX_MIN16_F4 fColor13;
+    FFX_MIN16_F4 fColor23;
+    FFX_MIN16_F4 fColor33;
+};
+#else //FFX_HALF
+#define FetchedBicubicSamplesMin16 FetchedBicubicSamples
+#define FetchedBilinearSamplesMin16 FetchedBilinearSamples
+#endif //FFX_HALF
+
+FfxFloat32x4 Linear(FfxFloat32x4 A, FfxFloat32x4 B, FfxFloat32 t)
+{
+    return A + (B - A) * t;
+}
+
+FfxFloat32x4 Bilinear(FetchedBilinearSamples BilinearSamples, FfxFloat32x2 fPxFrac)
+{
+    FfxFloat32x4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
+    FfxFloat32x4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
+    FfxFloat32x4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
+    return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Linear(FFX_MIN16_F4 A, FFX_MIN16_F4 B, FFX_MIN16_F t)
+{
+    return A + (B - A) * t;
+}
+
+FFX_MIN16_F4 Bilinear(FetchedBilinearSamplesMin16 BilinearSamples, FFX_MIN16_F2 fPxFrac)
+{
+    FFX_MIN16_F4 fColorX0 = Linear(BilinearSamples.fColor00, BilinearSamples.fColor10, fPxFrac.x);
+    FFX_MIN16_F4 fColorX1 = Linear(BilinearSamples.fColor01, BilinearSamples.fColor11, fPxFrac.x);
+    FFX_MIN16_F4 fColorXY = Linear(fColorX0, fColorX1, fPxFrac.y);
+    return fColorXY;
+}
+#endif
+
+FfxFloat32 Lanczos2NoClamp(FfxFloat32 x)
+{
+    const FfxFloat32 PI = 3.141592653589793f; // TODO: share SDK constants
+    return abs(x) < FSR2_EPSILON ? 1.f : (sin(PI * x) / (PI * x)) * (sin(0.5f * PI * x) / (0.5f * PI * x));
+}
+
+FfxFloat32 Lanczos2(FfxFloat32 x)
+{
+    x = ffxMin(abs(x), 2.0f);
+    return Lanczos2NoClamp(x);
+}
+
+#if FFX_HALF
+
+#if 0
+FFX_MIN16_F Lanczos2NoClamp(FFX_MIN16_F x)
+{
+    const FFX_MIN16_F PI = FFX_MIN16_F(3.141592653589793f); // TODO: share SDK constants
+    return abs(x) < FFX_MIN16_F(FSR2_EPSILON) ? FFX_MIN16_F(1.f) : (sin(PI * x) / (PI * x)) * (sin(FFX_MIN16_F(0.5f) * PI * x) / (FFX_MIN16_F(0.5f) * PI * x));
+}
+#endif
+
+FFX_MIN16_F Lanczos2(FFX_MIN16_F x)
+{
+    x = ffxMin(abs(x), FFX_MIN16_F(2.0f));
+    return FFX_MIN16_F(Lanczos2NoClamp(x));
+}
+#endif //FFX_HALF
+
+// FSR1 lanczos approximation. Input is x*x and must be <= 4.
+FfxFloat32 Lanczos2ApproxSqNoClamp(FfxFloat32 x2)
+{
+    FfxFloat32 a = (2.0f / 5.0f) * x2 - 1;
+    FfxFloat32 b = (1.0f / 4.0f) * x2 - 1;
+    return ((25.0f / 16.0f) * a * a - (25.0f / 16.0f - 1)) * (b * b);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxSqNoClamp(FFX_MIN16_F x2)
+{
+    FFX_MIN16_F a = FFX_MIN16_F(2.0f / 5.0f) * x2 - FFX_MIN16_F(1);
+    FFX_MIN16_F b = FFX_MIN16_F(1.0f / 4.0f) * x2 - FFX_MIN16_F(1);
+    return (FFX_MIN16_F(25.0f / 16.0f) * a * a - FFX_MIN16_F(25.0f / 16.0f - 1)) * (b * b);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2ApproxSq(FfxFloat32 x2)
+{
+    x2 = ffxMin(x2, 4.0f);
+    return Lanczos2ApproxSqNoClamp(x2);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxSq(FFX_MIN16_F x2)
+{
+    x2 = ffxMin(x2, FFX_MIN16_F(4.0f));
+    return Lanczos2ApproxSqNoClamp(x2);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2ApproxNoClamp(FfxFloat32 x)
+{
+    return Lanczos2ApproxSqNoClamp(x * x);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2ApproxNoClamp(FFX_MIN16_F x)
+{
+    return Lanczos2ApproxSqNoClamp(x * x);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2Approx(FfxFloat32 x)
+{
+    return Lanczos2ApproxSq(x * x);
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2Approx(FFX_MIN16_F x)
+{
+    return Lanczos2ApproxSq(x * x);
+}
+#endif //FFX_HALF
+
+FfxFloat32 Lanczos2_UseLUT(FfxFloat32 x)
+{
+    return SampleLanczos2Weight(abs(x));
+}
+
+#if FFX_HALF
+FFX_MIN16_F Lanczos2_UseLUT(FFX_MIN16_F x)
+{
+    return FFX_MIN16_F(SampleLanczos2Weight(abs(x)));
+}
+#endif //FFX_HALF
+
+FfxFloat32x4 Lanczos2_UseLUT(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+    FfxFloat32 fWeight0 = Lanczos2_UseLUT(-1.f - t);
+    FfxFloat32 fWeight1 = Lanczos2_UseLUT(-0.f - t);
+    FfxFloat32 fWeight2 = Lanczos2_UseLUT(+1.f - t);
+    FfxFloat32 fWeight3 = Lanczos2_UseLUT(+2.f - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2_UseLUT(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+    FFX_MIN16_F fWeight0 = Lanczos2_UseLUT(FFX_MIN16_F(-1.f) - t);
+    FFX_MIN16_F fWeight1 = Lanczos2_UseLUT(FFX_MIN16_F(-0.f) - t);
+    FFX_MIN16_F fWeight2 = Lanczos2_UseLUT(FFX_MIN16_F(+1.f) - t);
+    FFX_MIN16_F fWeight3 = Lanczos2_UseLUT(FFX_MIN16_F(+2.f) - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#endif
+
+FfxFloat32x4 Lanczos2(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+    FfxFloat32 fWeight0 = Lanczos2(-1.f - t);
+    FfxFloat32 fWeight1 = Lanczos2(-0.f - t);
+    FfxFloat32 fWeight2 = Lanczos2(+1.f - t);
+    FfxFloat32 fWeight3 = Lanczos2(+2.f - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+FfxFloat32x4 Lanczos2(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+    FfxFloat32x4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FfxFloat32x4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FfxFloat32x4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FfxFloat32x4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FfxFloat32x4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FfxFloat32x4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
+
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+    FFX_MIN16_F fWeight0 = Lanczos2(FFX_MIN16_F(-1.f) - t);
+    FFX_MIN16_F fWeight1 = Lanczos2(FFX_MIN16_F(-0.f) - t);
+    FFX_MIN16_F fWeight2 = Lanczos2(FFX_MIN16_F(+1.f) - t);
+    FFX_MIN16_F fWeight3 = Lanczos2(FFX_MIN16_F(+2.f) - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+FFX_MIN16_F4 Lanczos2(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+    FFX_MIN16_F4 fColorX0 = Lanczos2(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FFX_MIN16_F4 fColorX1 = Lanczos2(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FFX_MIN16_F4 fColorX2 = Lanczos2(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FFX_MIN16_F4 fColorX3 = Lanczos2(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FFX_MIN16_F4 fColorXY = Lanczos2(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FFX_MIN16_F4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+    {
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+#endif //FFX_HALF
+
+
+FfxFloat32x4 Lanczos2LUT(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+    FfxFloat32x4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FfxFloat32x4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FfxFloat32x4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FfxFloat32x4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FfxFloat32x4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FfxFloat32x4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex) {
+
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2LUT(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+    FFX_MIN16_F4 fColorX0 = Lanczos2_UseLUT(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FFX_MIN16_F4 fColorX1 = Lanczos2_UseLUT(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FFX_MIN16_F4 fColorX2 = Lanczos2_UseLUT(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FFX_MIN16_F4 fColorX3 = Lanczos2_UseLUT(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FFX_MIN16_F4 fColorXY = Lanczos2_UseLUT(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FFX_MIN16_F4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+    {
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+#endif //FFX_HALF
+
+
+
+FfxFloat32x4 Lanczos2Approx(FfxFloat32x4 fColor0, FfxFloat32x4 fColor1, FfxFloat32x4 fColor2, FfxFloat32x4 fColor3, FfxFloat32 t)
+{
+    FfxFloat32 fWeight0 = Lanczos2ApproxNoClamp(-1.f - t);
+    FfxFloat32 fWeight1 = Lanczos2ApproxNoClamp(-0.f - t);
+    FfxFloat32 fWeight2 = Lanczos2ApproxNoClamp(+1.f - t);
+    FfxFloat32 fWeight3 = Lanczos2ApproxNoClamp(+2.f - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2Approx(FFX_MIN16_F4 fColor0, FFX_MIN16_F4 fColor1, FFX_MIN16_F4 fColor2, FFX_MIN16_F4 fColor3, FFX_MIN16_F t)
+{
+    FFX_MIN16_F fWeight0 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-1.f) - t);
+    FFX_MIN16_F fWeight1 = Lanczos2ApproxNoClamp(FFX_MIN16_F(-0.f) - t);
+    FFX_MIN16_F fWeight2 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+1.f) - t);
+    FFX_MIN16_F fWeight3 = Lanczos2ApproxNoClamp(FFX_MIN16_F(+2.f) - t);
+    return (fWeight0 * fColor0 + fWeight1 * fColor1 + fWeight2 * fColor2 + fWeight3 * fColor3) / (fWeight0 + fWeight1 + fWeight2 + fWeight3);
+}
+#endif //FFX_HALF
+
+FfxFloat32x4 Lanczos2Approx(FetchedBicubicSamples Samples, FfxFloat32x2 fPxFrac)
+{
+    FfxFloat32x4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FfxFloat32x4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FfxFloat32x4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FfxFloat32x4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FfxFloat32x4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FfxFloat32x4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FfxFloat32x4 fDeringingMin = fDeringingSamples[0];
+    FfxFloat32x4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+    {
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+
+#if FFX_HALF
+FFX_MIN16_F4 Lanczos2Approx(FetchedBicubicSamplesMin16 Samples, FFX_MIN16_F2 fPxFrac)
+{
+    FFX_MIN16_F4 fColorX0 = Lanczos2Approx(Samples.fColor00, Samples.fColor10, Samples.fColor20, Samples.fColor30, fPxFrac.x);
+    FFX_MIN16_F4 fColorX1 = Lanczos2Approx(Samples.fColor01, Samples.fColor11, Samples.fColor21, Samples.fColor31, fPxFrac.x);
+    FFX_MIN16_F4 fColorX2 = Lanczos2Approx(Samples.fColor02, Samples.fColor12, Samples.fColor22, Samples.fColor32, fPxFrac.x);
+    FFX_MIN16_F4 fColorX3 = Lanczos2Approx(Samples.fColor03, Samples.fColor13, Samples.fColor23, Samples.fColor33, fPxFrac.x);
+    FFX_MIN16_F4 fColorXY = Lanczos2Approx(fColorX0, fColorX1, fColorX2, fColorX3, fPxFrac.y);
+
+    // Deringing
+
+    // TODO: only use 4 by checking jitter
+    const FfxInt32 iDeringingSampleCount = 4;
+    const FFX_MIN16_F4 fDeringingSamples[4] = {
+        Samples.fColor11,
+        Samples.fColor21,
+        Samples.fColor12,
+        Samples.fColor22,
+    };
+
+    FFX_MIN16_F4 fDeringingMin = fDeringingSamples[0];
+    FFX_MIN16_F4 fDeringingMax = fDeringingSamples[0];
+
+    FFX_UNROLL
+    for (FfxInt32 iSampleIndex = 1; iSampleIndex < iDeringingSampleCount; ++iSampleIndex)
+    {
+        fDeringingMin = ffxMin(fDeringingMin, fDeringingSamples[iSampleIndex]);
+        fDeringingMax = ffxMax(fDeringingMax, fDeringingSamples[iSampleIndex]);
+    }
+
+    fColorXY = clamp(fColorXY, fDeringingMin, fDeringingMax);
+
+    return fColorXY;
+}
+#endif
+
+// Clamp by offset direction. Assuming iPxSample is already in range and iPxOffset is compile time constant.
+FfxInt32x2 ClampCoord(FfxInt32x2 iPxSample, FfxInt32x2 iPxOffset, FfxInt32x2 iTextureSize)
+{
+    FfxInt32x2 result = iPxSample + iPxOffset;
+    result.x = (iPxOffset.x < 0) ? ffxMax(result.x, 0) : result.x;
+    result.x = (iPxOffset.x > 0) ? ffxMin(result.x, iTextureSize.x - 1) : result.x;
+    result.y = (iPxOffset.y < 0) ? ffxMax(result.y, 0) : result.y;
+    result.y = (iPxOffset.y > 0) ? ffxMin(result.y, iTextureSize.y - 1) : result.y;
+    return result;
+}
+#if FFX_HALF
+FFX_MIN16_I2 ClampCoord(FFX_MIN16_I2 iPxSample, FFX_MIN16_I2 iPxOffset, FFX_MIN16_I2 iTextureSize)
+{
+    FFX_MIN16_I2 result = iPxSample + iPxOffset;
+    result.x = (iPxOffset.x < FFX_MIN16_I(0)) ? ffxMax(result.x, FFX_MIN16_I(0)) : result.x;
+    result.x = (iPxOffset.x > FFX_MIN16_I(0)) ? ffxMin(result.x, iTextureSize.x - FFX_MIN16_I(1)) : result.x;
+    result.y = (iPxOffset.y < FFX_MIN16_I(0)) ? ffxMax(result.y, FFX_MIN16_I(0)) : result.y;
+    result.y = (iPxOffset.y > FFX_MIN16_I(0)) ? ffxMin(result.y, iTextureSize.y - FFX_MIN16_I(1)) : result.y;
+    return result;
+}
+#endif //FFX_HALF
+
+
+#define DeclareCustomFetchBicubicSamplesWithType(SampleType, TextureType, AddrType, Name, LoadTexture)               \
+    SampleType Name(AddrType iPxSample, AddrType iTextureSize)                                          \
+    {                                                                                                   \
+        SampleType Samples;                                                                             \
+                                                                                                        \
+        Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, -1), iTextureSize)));    \
+        Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, -1), iTextureSize)));    \
+        Samples.fColor20 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, -1), iTextureSize)));    \
+        Samples.fColor30 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, -1), iTextureSize)));    \
+                                                                                                        \
+        Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +0), iTextureSize)));    \
+        Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize)));    \
+        Samples.fColor21 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize)));    \
+        Samples.fColor31 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +0), iTextureSize)));    \
+                                                                                                        \
+        Samples.fColor02 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +1), iTextureSize)));    \
+        Samples.fColor12 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize)));    \
+        Samples.fColor22 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize)));    \
+        Samples.fColor32 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +1), iTextureSize)));    \
+                                                                                                        \
+        Samples.fColor03 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(-1, +2), iTextureSize)));    \
+        Samples.fColor13 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +2), iTextureSize)));    \
+        Samples.fColor23 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +2), iTextureSize)));    \
+        Samples.fColor33 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+2, +2), iTextureSize)));    \
+                                                                                                        \
+        return Samples;                                                                                 \
+    }
+
+#define DeclareCustomFetchBicubicSamples(Name, LoadTexture)                                             \
+    DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBicubicSamplesMin16(Name, LoadTexture)                                        \
+    DeclareCustomFetchBicubicSamplesWithType(FetchedBicubicSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBilinearSamplesWithType(SampleType, TextureType,AddrType, Name, LoadTexture)  \
+    SampleType Name(AddrType iPxSample, AddrType iTextureSize)                                          \
+    {                                                                                                   \
+        SampleType Samples;                                                                             \
+        Samples.fColor00 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +0), iTextureSize)));           \
+        Samples.fColor10 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +0), iTextureSize)));           \
+        Samples.fColor01 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+0, +1), iTextureSize)));           \
+        Samples.fColor11 = TextureType(LoadTexture(ClampCoord(iPxSample, AddrType(+1, +1), iTextureSize)));           \
+        return Samples;                                                                                 \
+    }
+
+#define DeclareCustomFetchBilinearSamples(Name, LoadTexture)                                             \
+    DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamples, FfxFloat32x4, FfxInt32x2, Name, LoadTexture)
+
+#define DeclareCustomFetchBilinearSamplesMin16(Name, LoadTexture)                                        \
+    DeclareCustomFetchBilinearSamplesWithType(FetchedBilinearSamplesMin16, FFX_MIN16_F4, FfxInt32x2, Name, LoadTexture)
+
+// BE CAREFUL: there is some precision issues and (3253, 125) leading to (3252.9989778, 125.001102)
+// is common, so iPxSample can "jitter"
+#define DeclareCustomTextureSample(Name, InterpolateSamples, FetchSamples)                                           \
+    FfxFloat32x4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
+    {                                                                                                                \
+        FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f);                \
+        /* Clamp base coords */                                                                                      \
+        fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x));                                 \
+        fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y));                                 \
+        /* */                                                                                                        \
+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
+        FfxFloat32x2 fPxFrac = ffxFract(fPxSample);                                                                  \
+        FfxFloat32x4 fColorXY = FfxFloat32x4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
+        return fColorXY;                                                                                             \
+    }
+
+#define DeclareCustomTextureSampleMin16(Name, InterpolateSamples, FetchSamples)                                      \
+    FFX_MIN16_F4 Name(FfxFloat32x2 fUvSample, FfxInt32x2 iTextureSize)                                               \
+    {                                                                                                                \
+        FfxFloat32x2 fPxSample = (fUvSample * FfxFloat32x2(iTextureSize)) - FfxFloat32x2(0.5f, 0.5f);                \
+        /* Clamp base coords */                                                                                      \
+        fPxSample.x = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.x), fPxSample.x));                                 \
+        fPxSample.y = ffxMax(0.0f, ffxMin(FfxFloat32(iTextureSize.y), fPxSample.y));                                 \
+        /* */                                                                                                        \
+        FfxInt32x2 iPxSample = FfxInt32x2(floor(fPxSample));                                                         \
+        FFX_MIN16_F2 fPxFrac = FFX_MIN16_F2(ffxFract(fPxSample));                                                    \
+        FFX_MIN16_F4 fColorXY = FFX_MIN16_F4(InterpolateSamples(FetchSamples(iPxSample, iTextureSize), fPxFrac));    \
+        return fColorXY;                                                                                             \
+    }
+
+#define FFX_FSR2_CONCAT_ID(x, y) x ## y
+#define FFX_FSR2_CONCAT(x, y) FFX_FSR2_CONCAT_ID(x, y)
+#define FFX_FSR2_SAMPLER_1D_0 Lanczos2
+#define FFX_FSR2_SAMPLER_1D_1 Lanczos2LUT
+#define FFX_FSR2_SAMPLER_1D_2 Lanczos2Approx
+
+#define FFX_FSR2_GET_LANCZOS_SAMPLER1D(x) FFX_FSR2_CONCAT(FFX_FSR2_SAMPLER_1D_, x)
+
+#endif //!defined( FFX_FSR2_SAMPLE_H )
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
new file mode 100644
index 0000000000..101b75d25e
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen.h
@@ -0,0 +1,250 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#define USE_YCOCG 1
+
+#define fAutogenEpsilon 0.01f
+
+// EXPERIMENTAL
+
+FFX_MIN16_F ComputeAutoTC_01(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
+
+#if USE_YCOCG    
+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
+#endif
+
+    FfxFloat32x3 colorDeltaCurr = colorPostAlpha - colorPreAlpha;
+    FfxFloat32x3 colorDeltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
+    bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDeltaCurr), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
+    bool hadAlpha = any(FFX_GREATER_THAN(abs(colorDeltaPrev), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
+
+    FfxFloat32x3 X = colorPreAlpha;
+    FfxFloat32x3 Y = colorPostAlpha;
+    FfxFloat32x3 Z = colorPrevPreAlpha;
+    FfxFloat32x3 W = colorPrevPostAlpha;
+
+    FFX_MIN16_F retVal = FFX_MIN16_F(ffxSaturate(dot(abs(abs(Y - X) - abs(W - Z)), FfxFloat32x3(1, 1, 1))));
+
+    // cleanup very small values
+    retVal = (retVal < getTcThreshold()) ? FFX_MIN16_F(0.0f) : FFX_MIN16_F(1.f);
+
+    return retVal;
+}
+
+// works ok: thin edges
+FFX_MIN16_F ComputeAutoTC_02(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
+
+#if USE_YCOCG    
+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
+#endif
+
+    FfxFloat32x3 colorDelta = colorPostAlpha - colorPreAlpha;
+    FfxFloat32x3 colorPrevDelta = colorPrevPostAlpha - colorPrevPreAlpha;
+    bool hasAlpha = any(FFX_GREATER_THAN(abs(colorDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
+    bool hadAlpha = any(FFX_GREATER_THAN(abs(colorPrevDelta), FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon)));
+
+    FfxFloat32x3 delta = colorPostAlpha - colorPreAlpha;              //prev+1*d = post   => d = color, alpha =
+    FfxFloat32x3 deltaPrev = colorPrevPostAlpha - colorPrevPreAlpha;
+
+    FfxFloat32x3 X = colorPrevPreAlpha;
+    FfxFloat32x3 N = colorPreAlpha - colorPrevPreAlpha;
+    FfxFloat32x3 YAminusXA = colorPrevPostAlpha - colorPrevPreAlpha;
+    FfxFloat32x3 NminusNA = colorPostAlpha - colorPrevPostAlpha;
+
+    FfxFloat32x3 A = (hasAlpha || hadAlpha) ? NminusNA / max(FfxFloat32x3(fAutogenEpsilon, fAutogenEpsilon, fAutogenEpsilon), N) : FfxFloat32x3(0, 0, 0);
+
+    FFX_MIN16_F retVal = FFX_MIN16_F( max(max(A.x, A.y), A.z) );
+
+    // only pixels that have significantly changed in color shuold be considered
+    retVal = ffxSaturate(retVal * FFX_MIN16_F(length(colorPostAlpha - colorPrevPostAlpha)) );
+
+    return retVal;
+}
+
+// This function computes the TransparencyAndComposition mask:
+// This mask indicates pixels that should discard locks and apply color clamping.
+// 
+// Typically this is the case for translucent pixels (that don't write depth values) or pixels where the correctness of 
+// the MVs can not be guaranteed (e.g. procedutal movement or vegetation that does not have MVs to reduce the cost during rasterization)
+// Also, large changes in color due to changed lighting should be marked to remove locks on pixels with "old" lighting.
+//
+// This function takes a opaque only and a final texture and uses internal copies of those textures from the last frame.
+// The function tries to determine where the color changes between opaque only and final image to determine the pixels that use transparency.
+// Also it uses the previous frames and detects where the use of transparency changed to mark those pixels.
+// Additionally it marks pixels where the color changed significantly in the opaque only image, e.g. due to lighting or texture animation.
+// 
+// In the final step it stores the current textures in internal textures for the next frame
+
+FFX_MIN16_F ComputeTransparencyAndComposition(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    FFX_MIN16_F retVal = ComputeAutoTC_02(uDispatchThreadId, iPrevIdx);
+
+    // [branch]
+    if (retVal > FFX_MIN16_F(0.01f))
+    {
+        retVal = ComputeAutoTC_01(uDispatchThreadId, iPrevIdx);
+    }
+    return retVal;
+}
+
+float computeSolidEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
+{
+    float lum[9];
+    int i = 0;
+    for (int y = -1; y < 2; ++y)
+    {
+        for (int x = -1; x < 2; ++x)
+        {
+            FfxFloat32x3 curCol  = LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb;
+            FfxFloat32x3 prevCol = LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb;
+            lum[i++] = length(curCol - prevCol);
+        }
+    }
+
+    //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
+    //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
+
+    //return sqrt(gradX * gradX + gradY * gradY);
+
+    float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
+    float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
+
+    return sqrt(sqrt(gradX * gradY));
+}
+
+float computeAlphaEdge(FFX_MIN16_I2 curPos, FFX_MIN16_I2 prevPos)
+{
+    float lum[9];
+    int i = 0;
+    for (int y = -1; y < 2; ++y)
+    {
+        for (int x = -1; x < 2; ++x)
+        {
+            FfxFloat32x3 curCol  = abs(LoadInputColor(curPos + FFX_MIN16_I2(x, y)).rgb - LoadOpaqueOnly(curPos + FFX_MIN16_I2(x, y)).rgb);
+            FfxFloat32x3 prevCol = abs(LoadPrevPostAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb - LoadPrevPreAlpha(prevPos + FFX_MIN16_I2(x, y)).rgb);
+            lum[i++] = length(curCol - prevCol);
+        }
+    }
+
+    //float gradX = abs(lum[3] - lum[4]) + abs(lum[5] - lum[4]);
+    //float gradY = abs(lum[1] - lum[4]) + abs(lum[7] - lum[4]);
+
+    //return sqrt(gradX * gradX + gradY * gradY);
+
+    float gradX = abs(lum[3] - lum[4]) * abs(lum[5] - lum[4]);
+    float gradY = abs(lum[1] - lum[4]) * abs(lum[7] - lum[4]);
+
+    return sqrt(sqrt(gradX * gradY));
+}
+
+FFX_MIN16_F ComputeAabbOverlap(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
+
+    FfxFloat32x2 fMotionVector = LoadInputMotionVector(uDispatchThreadId);
+    FfxFloat32x3 colorPreAlpha = LoadOpaqueOnly(uDispatchThreadId);
+    FfxFloat32x3 colorPostAlpha = LoadInputColor(uDispatchThreadId);
+    FfxFloat32x3 colorPrevPreAlpha = LoadPrevPreAlpha(iPrevIdx);
+    FfxFloat32x3 colorPrevPostAlpha = LoadPrevPostAlpha(iPrevIdx);
+
+#if USE_YCOCG    
+    colorPreAlpha = RGBToYCoCg(colorPreAlpha);
+    colorPostAlpha = RGBToYCoCg(colorPostAlpha);
+    colorPrevPreAlpha = RGBToYCoCg(colorPrevPreAlpha);
+    colorPrevPostAlpha = RGBToYCoCg(colorPrevPostAlpha);
+#endif
+    FfxFloat32x3 minPrev = FFX_MIN16_F3(+1000.f, +1000.f, +1000.f);
+    FfxFloat32x3 maxPrev = FFX_MIN16_F3(-1000.f, -1000.f, -1000.f);
+    for (int y = -1; y < 2; ++y)
+    {
+        for (int x = -1; x < 2; ++x)
+        {
+            FfxFloat32x3 W = LoadPrevPostAlpha(iPrevIdx + FFX_MIN16_I2(x, y));
+
+#if USE_YCOCG
+            W = RGBToYCoCg(W);
+#endif
+            minPrev = min(minPrev, W);
+            maxPrev = max(maxPrev, W);
+        }
+    }
+    // instead of computing the overlap: simply count how many samples are outside
+    // set reactive based on that
+    FFX_MIN16_F count = FFX_MIN16_F(0.f);
+    for (int y = -1; y < 2; ++y)
+    {
+        for (int x = -1; x < 2; ++x)
+        {
+            FfxFloat32x3 Y = LoadInputColor(uDispatchThreadId + FFX_MIN16_I2(x, y));
+
+#if USE_YCOCG
+            Y = RGBToYCoCg(Y);
+#endif
+            count += ((Y.x < minPrev.x) || (Y.x > maxPrev.x)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
+            count += ((Y.y < minPrev.y) || (Y.y > maxPrev.y)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
+            count += ((Y.z < minPrev.z) || (Y.z > maxPrev.z)) ? FFX_MIN16_F(1.f) : FFX_MIN16_F(0.f);
+        }
+    }
+    retVal = count / FFX_MIN16_F(27.f);
+
+    return retVal;
+}
+
+
+// This function computes the Reactive mask:
+// We want pixels marked where the alpha portion of the frame changes a lot between neighbours
+// Those pixels are expected to change quickly between frames, too. (e.g. small particles, reflections on curved surfaces...)
+// As a result history would not be trustworthy.
+// On the other hand we don't want pixels marked where pre-alpha has a large differnce, since those would profit from accumulation
+// For mirrors we may assume the pre-alpha is pretty uniform color.
+// 
+// This works well generally, but also marks edge pixels
+FFX_MIN16_F ComputeReactive(FFX_MIN16_I2 uDispatchThreadId, FFX_MIN16_I2 iPrevIdx)
+{
+    // we only get here if alpha has a significant contribution and has changed since last frame.
+    FFX_MIN16_F retVal = FFX_MIN16_F(0.f);
+
+    // mark pixels with huge variance in alpha as reactive
+    FFX_MIN16_F alphaEdge = FFX_MIN16_F(computeAlphaEdge(uDispatchThreadId, iPrevIdx));
+    FFX_MIN16_F opaqueEdge = FFX_MIN16_F(computeSolidEdge(uDispatchThreadId, iPrevIdx));
+    retVal = ffxSaturate(alphaEdge - opaqueEdge);
+
+    // the above also marks edge pixels due to jitter, so we need to cancel those out
+
+
+    return retVal;
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
new file mode 100644
index 0000000000..12b4b40e08
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_tcr_autogen_pass.glsl
@@ -0,0 +1,122 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+
+
+#extension GL_GOOGLE_include_directive : require
+#extension GL_EXT_samplerless_texture_functions : require
+
+#define FSR2_BIND_SRV_INPUT_OPAQUE_ONLY                     0
+#define FSR2_BIND_SRV_INPUT_COLOR                           1
+#define FSR2_BIND_SRV_INPUT_MOTION_VECTORS                  2
+#define FSR2_BIND_SRV_PREV_PRE_ALPHA_COLOR                  3
+#define FSR2_BIND_SRV_PREV_POST_ALPHA_COLOR                 4
+#define FSR2_BIND_SRV_REACTIVE_MASK                         5
+#define FSR2_BIND_SRV_TRANSPARENCY_AND_COMPOSITION_MASK     6
+
+#define FSR2_BIND_UAV_AUTOREACTIVE                          7
+#define FSR2_BIND_UAV_AUTOCOMPOSITION                       8
+#define FSR2_BIND_UAV_PREV_PRE_ALPHA_COLOR                  9
+#define FSR2_BIND_UAV_PREV_POST_ALPHA_COLOR                 10
+
+#define FSR2_BIND_CB_FSR2									11
+#define FSR2_BIND_CB_REACTIVE                               12
+
+// -- GODOT start --
+#if FFX_FSR2_OPTION_GODOT_DERIVE_INVALID_MOTION_VECTORS
+#define FSR2_BIND_SRV_INPUT_DEPTH                           13
+#endif
+// -- GODOT end --
+
+#include "ffx_fsr2_callbacks_glsl.h"
+#include "ffx_fsr2_common.h"
+
+#ifdef FSR2_BIND_CB_REACTIVE
+layout (set = 1, binding = FSR2_BIND_CB_REACTIVE, std140) uniform cbGenerateReactive_t
+{
+        float   fTcThreshold; // 0.1 is a good starting value, lower will result in more TC pixels
+        float   fTcScale;     
+        float   fReactiveScale;
+        float   fReactiveMax;
+} cbGenerateReactive;
+
+float getTcThreshold()
+{
+    return cbGenerateReactive.fTcThreshold;
+}
+
+#else
+ float getTcThreshold()
+ {
+    return 0.05f;
+ }
+#endif
+
+#include "ffx_fsr2_tcr_autogen.h"
+
+#ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#define FFX_FSR2_THREAD_GROUP_WIDTH 8
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_WIDTH
+#ifndef FFX_FSR2_THREAD_GROUP_HEIGHT
+#define FFX_FSR2_THREAD_GROUP_HEIGHT 8
+#endif // FFX_FSR2_THREAD_GROUP_HEIGHT
+#ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#define FFX_FSR2_THREAD_GROUP_DEPTH 1
+#endif // #ifndef FFX_FSR2_THREAD_GROUP_DEPTH
+#ifndef FFX_FSR2_NUM_THREADS
+#define FFX_FSR2_NUM_THREADS layout (local_size_x = FFX_FSR2_THREAD_GROUP_WIDTH, local_size_y = FFX_FSR2_THREAD_GROUP_HEIGHT, local_size_z = FFX_FSR2_THREAD_GROUP_DEPTH) in;
+#endif // #ifndef FFX_FSR2_NUM_THREADS
+
+FFX_FSR2_NUM_THREADS
+void main()
+{
+    FFX_MIN16_I2 uDispatchThreadId = FFX_MIN16_I2(gl_GlobalInvocationID.xy);
+
+    // ToDo: take into account jitter (i.e. add delta of previous jitter and current jitter to previous UV
+    // fetch pre- and post-alpha color values
+    FFX_MIN16_F2 fUv = ( FFX_MIN16_F2(uDispatchThreadId) + FFX_MIN16_F2(0.5f, 0.5f) ) / FFX_MIN16_F2( RenderSize() );
+    FFX_MIN16_F2 fPrevUV = fUv + FFX_MIN16_F2( LoadInputMotionVector(uDispatchThreadId) );
+    FFX_MIN16_I2 iPrevIdx = FFX_MIN16_I2(fPrevUV * FFX_MIN16_F2(RenderSize()) - 0.5f);
+
+    FFX_MIN16_F3 colorPreAlpha  = FFX_MIN16_F3( LoadOpaqueOnly( uDispatchThreadId ) );
+    FFX_MIN16_F3 colorPostAlpha = FFX_MIN16_F3( LoadInputColor( uDispatchThreadId ) );
+
+    FFX_MIN16_F2 outReactiveMask = FFX_MIN16_F2( 0.f, 0.f );
+    
+    outReactiveMask.y = ComputeTransparencyAndComposition(uDispatchThreadId, iPrevIdx);
+
+    if (outReactiveMask.y > 0.5f)
+    {
+        outReactiveMask.x = ComputeReactive(uDispatchThreadId, iPrevIdx);
+        outReactiveMask.x *= FFX_MIN16_F(cbGenerateReactive.fReactiveScale);
+        outReactiveMask.x = outReactiveMask.x < cbGenerateReactive.fReactiveMax ? outReactiveMask.x : FFX_MIN16_F( cbGenerateReactive.fReactiveMax );
+    }
+
+    outReactiveMask.y *= FFX_MIN16_F(cbGenerateReactive.fTcScale);
+
+    outReactiveMask.x = ffxMax(outReactiveMask.x, FFX_MIN16_F(LoadReactiveMask(uDispatchThreadId)));
+    outReactiveMask.y = ffxMax(outReactiveMask.y, FFX_MIN16_F(LoadTransparencyAndCompositionMask(uDispatchThreadId)));
+
+    StoreAutoReactive(uDispatchThreadId, outReactiveMask);
+
+    StorePrevPreAlpha(uDispatchThreadId, colorPreAlpha);
+    StorePrevPostAlpha(uDispatchThreadId, colorPostAlpha);
+}
diff --git a/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h b/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h
new file mode 100644
index 0000000000..abdb8888a9
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_fsr2_upsample.h
@@ -0,0 +1,194 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifndef FFX_FSR2_UPSAMPLE_H
+#define FFX_FSR2_UPSAMPLE_H
+
+FFX_STATIC const FfxUInt32 iLanczos2SampleCount = 16;
+
+void Deringing(RectificationBox clippingBox, FFX_PARAMETER_INOUT FfxFloat32x3 fColor)
+{
+    fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
+}
+#if FFX_HALF
+void Deringing(RectificationBoxMin16 clippingBox, FFX_PARAMETER_INOUT FFX_MIN16_F3 fColor)
+{
+    fColor = clamp(fColor, clippingBox.aabbMin, clippingBox.aabbMax);
+}
+#endif
+
+#ifndef FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE
+#define FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE 2 // Approximate
+#endif
+
+FfxFloat32 GetUpsampleLanczosWeight(FfxFloat32x2 fSrcSampleOffset, FfxFloat32 fKernelWeight)
+{
+    FfxFloat32x2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
+#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
+    FfxFloat32 fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
+    FfxFloat32 fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+    FfxFloat32 fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+#else
+#error "Invalid Lanczos type"
+#endif
+    return fSampleWeight;
+}
+
+#if FFX_HALF
+FFX_MIN16_F GetUpsampleLanczosWeight(FFX_MIN16_F2 fSrcSampleOffset, FFX_MIN16_F fKernelWeight)
+{
+    FFX_MIN16_F2 fSrcSampleOffsetBiased = fSrcSampleOffset * fKernelWeight.xx;
+#if FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 0 // LANCZOS_TYPE_REFERENCE
+    FFX_MIN16_F fSampleWeight = Lanczos2(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 1 // LANCZOS_TYPE_LUT
+    FFX_MIN16_F fSampleWeight = Lanczos2_UseLUT(length(fSrcSampleOffsetBiased));
+#elif FFX_FSR2_OPTION_UPSAMPLE_USE_LANCZOS_TYPE == 2 // LANCZOS_TYPE_APPROXIMATE
+    FFX_MIN16_F fSampleWeight = Lanczos2ApproxSq(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+
+    // To Test: Save reciproqual sqrt compute
+    // FfxFloat32 fSampleWeight = Lanczos2Sq_UseLUT(dot(fSrcSampleOffsetBiased, fSrcSampleOffsetBiased));
+#else
+#error "Invalid Lanczos type"
+#endif
+    return fSampleWeight;
+}
+#endif
+
+FfxFloat32 ComputeMaxKernelWeight() {
+    const FfxFloat32 fKernelSizeBias = 1.0f;
+
+    FfxFloat32 fKernelWeight = FfxFloat32(1) + (FfxFloat32(1.0f) / FfxFloat32x2(DownscaleFactor()) - FfxFloat32(1)).x * FfxFloat32(fKernelSizeBias);
+
+    return ffxMin(FfxFloat32(1.99f), fKernelWeight);
+}
+
+FfxFloat32x4 ComputeUpsampledColorAndWeight(const AccumulationPassCommonParams params,
+    FFX_PARAMETER_INOUT RectificationBox clippingBox, FfxFloat32 fReactiveFactor)
+{
+    #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
+    #include "ffx_fsr2_force16_begin.h"
+    #endif
+    // We compute a sliced lanczos filter with 2 lobes (other slices are accumulated temporaly)
+    FfxFloat32x2 fDstOutputPos = FfxFloat32x2(params.iPxHrPos) + FFX_BROADCAST_FLOAT32X2(0.5f);      // Destination resolution output pixel center position
+    FfxFloat32x2 fSrcOutputPos = fDstOutputPos * DownscaleFactor();                   // Source resolution output pixel center position
+    FfxInt32x2 iSrcInputPos = FfxInt32x2(floor(fSrcOutputPos));                     // TODO: what about weird upscale factors...
+
+    #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
+    #include "ffx_fsr2_force16_end.h"
+    #endif
+
+    FfxFloat32x3 fSamples[iLanczos2SampleCount];
+
+    FfxFloat32x2 fSrcUnjitteredPos = (FfxFloat32x2(iSrcInputPos) + FfxFloat32x2(0.5f, 0.5f)) - Jitter(); // This is the un-jittered position of the sample at offset 0,0
+
+    FfxInt32x2 offsetTL;
+    offsetTL.x = (fSrcUnjitteredPos.x > fSrcOutputPos.x) ? FfxInt32(-2) : FfxInt32(-1);
+    offsetTL.y = (fSrcUnjitteredPos.y > fSrcOutputPos.y) ? FfxInt32(-2) : FfxInt32(-1);
+
+    //Load samples
+    // If fSrcUnjitteredPos.y > fSrcOutputPos.y, indicates offsetTL.y = -2, sample offset Y will be [-2, 1], clipbox will be rows [1, 3].
+    // Flip row# for sampling offset in this case, so first 0~2 rows in the sampled array can always be used for computing the clipbox.
+    // This reduces branch or cmove on sampled colors, but moving this overhead to sample position / weight calculation time which apply to less values.
+    const FfxBoolean bFlipRow = fSrcUnjitteredPos.y > fSrcOutputPos.y;
+    const FfxBoolean bFlipCol = fSrcUnjitteredPos.x > fSrcOutputPos.x;
+
+    FfxFloat32x2 fOffsetTL = FfxFloat32x2(offsetTL);
+
+    FFX_UNROLL
+    for (FfxInt32 row = 0; row < 3; row++) {
+
+        FFX_UNROLL
+            for (FfxInt32 col = 0; col < 3; col++) {
+                FfxInt32 iSampleIndex = col + (row << 2);
+
+                FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
+                FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + offsetTL + sampleColRow;
+
+                const FfxInt32x2 sampleCoord = ClampLoad(iSrcSamplePos, FfxInt32x2(0, 0), FfxInt32x2(RenderSize()));
+
+                fSamples[iSampleIndex] = LoadPreparedInputColor(FfxInt32x2(sampleCoord));
+            }
+    }
+
+    FfxFloat32x4 fColorAndWeight = FfxFloat32x4(0.0f, 0.0f, 0.0f, 0.0f);
+
+    FfxFloat32x2 fBaseSampleOffset = FfxFloat32x2(fSrcUnjitteredPos - fSrcOutputPos);
+
+    // Identify how much of each upsampled color to be used for this frame
+    const FfxFloat32 fKernelReactiveFactor = ffxMax(fReactiveFactor, FfxFloat32(params.bIsNewSample));
+    const FfxFloat32 fKernelBiasMax = ComputeMaxKernelWeight() * (1.0f - fKernelReactiveFactor);
+
+    const FfxFloat32 fKernelBiasMin = ffxMax(1.0f, ((1.0f + fKernelBiasMax) * 0.3f));
+    const FfxFloat32 fKernelBiasFactor = ffxMax(0.0f, ffxMax(0.25f * params.fDepthClipFactor, fKernelReactiveFactor));
+    const FfxFloat32 fKernelBias = ffxLerp(fKernelBiasMax, fKernelBiasMin, fKernelBiasFactor);
+
+    const FfxFloat32 fRectificationCurveBias = ffxLerp(-2.0f, -3.0f, ffxSaturate(params.fHrVelocity / 50.0f));
+
+    FFX_UNROLL
+    for (FfxInt32 row = 0; row < 3; row++) {
+        FFX_UNROLL
+        for (FfxInt32 col = 0; col < 3; col++) {
+            FfxInt32 iSampleIndex = col + (row << 2);
+
+            const FfxInt32x2 sampleColRow = FfxInt32x2(bFlipCol ? (3 - col) : col, bFlipRow ? (3 - row) : row);
+            const FfxFloat32x2 fOffset = fOffsetTL + FfxFloat32x2(sampleColRow);
+            FfxFloat32x2 fSrcSampleOffset = fBaseSampleOffset + fOffset;
+
+            FfxInt32x2 iSrcSamplePos = FfxInt32x2(iSrcInputPos) + FfxInt32x2(offsetTL) + sampleColRow;
+
+            const FfxFloat32 fOnScreenFactor = FfxFloat32(IsOnScreen(FfxInt32x2(iSrcSamplePos), FfxInt32x2(RenderSize())));
+            FfxFloat32 fSampleWeight = fOnScreenFactor * FfxFloat32(GetUpsampleLanczosWeight(fSrcSampleOffset, fKernelBias));
+
+            fColorAndWeight += FfxFloat32x4(fSamples[iSampleIndex] * fSampleWeight, fSampleWeight);
+
+            // Update rectification box
+            {
+                const FfxFloat32 fSrcSampleOffsetSq = dot(fSrcSampleOffset, fSrcSampleOffset);
+                const FfxFloat32 fBoxSampleWeight = exp(fRectificationCurveBias * fSrcSampleOffsetSq);
+
+                const FfxBoolean bInitialSample = (row == 0) && (col == 0);
+                RectificationBoxAddSample(bInitialSample, clippingBox, fSamples[iSampleIndex], fBoxSampleWeight);
+            }
+        }
+    }
+
+    RectificationBoxComputeVarianceBoxData(clippingBox);
+
+    fColorAndWeight.w *= FfxFloat32(fColorAndWeight.w > FSR2_EPSILON);
+
+    if (fColorAndWeight.w > FSR2_EPSILON) {
+        // Normalize for deringing (we need to compare colors)
+        fColorAndWeight.xyz = fColorAndWeight.xyz / fColorAndWeight.w;
+        fColorAndWeight.w *= fUpsampleLanczosWeightScale;
+
+        Deringing(clippingBox, fColorAndWeight.xyz);
+    }
+
+    #if FFX_FSR2_OPTION_UPSAMPLE_SAMPLERS_USE_DATA_HALF && FFX_HALF
+    #include "ffx_fsr2_force16_end.h"
+    #endif
+
+    return fColorAndWeight;
+}
+
+#endif //!defined( FFX_FSR2_UPSAMPLE_H )
diff --git a/thirdparty/amd-fsr2/shaders/ffx_spd.h b/thirdparty/amd-fsr2/shaders/ffx_spd.h
new file mode 100644
index 0000000000..5ce24ec87c
--- /dev/null
+++ b/thirdparty/amd-fsr2/shaders/ffx_spd.h
@@ -0,0 +1,936 @@
+// This file is part of the FidelityFX SDK.
+//
+// Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+#ifdef FFX_CPU
+FFX_STATIC void SpdSetup(FfxUInt32x2    dispatchThreadGroupCountXY,  // CPU side: dispatch thread group count xy
+                         FfxUInt32x2    workGroupOffset,             // GPU side: pass in as constant
+                         FfxUInt32x2    numWorkGroupsAndMips,        // GPU side: pass in as constant
+                         FfxUInt32x4     rectInfo,                    // left, top, width, height
+                         FfxInt32 mips)                        // optional: if -1, calculate based on rect width and height
+{
+    workGroupOffset[0] = rectInfo[0] / 64;  // rectInfo[0] = left
+    workGroupOffset[1] = rectInfo[1] / 64;  // rectInfo[1] = top
+
+    FfxUInt32 endIndexX = (rectInfo[0] + rectInfo[2] - 1) / 64;  // rectInfo[0] = left, rectInfo[2] = width
+    FfxUInt32 endIndexY = (rectInfo[1] + rectInfo[3] - 1) / 64;  // rectInfo[1] = top, rectInfo[3] = height
+
+    dispatchThreadGroupCountXY[0] = endIndexX + 1 - workGroupOffset[0];
+    dispatchThreadGroupCountXY[1] = endIndexY + 1 - workGroupOffset[1];
+
+    numWorkGroupsAndMips[0] = (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]);
+
+    if (mips >= 0)
+    {
+        numWorkGroupsAndMips[1] = FfxUInt32(mips);
+    }
+    else
+    {  
+        // calculate based on rect width and height
+        FfxUInt32 resolution    = ffxMax(rectInfo[2], rectInfo[3]);
+        numWorkGroupsAndMips[1] = FfxUInt32((ffxMin(floor(log2(FfxFloat32(resolution))), FfxFloat32(12))));
+    }
+}
+
+FFX_STATIC void SpdSetup(FfxUInt32x2 dispatchThreadGroupCountXY,  // CPU side: dispatch thread group count xy
+                         FfxUInt32x2 workGroupOffset,             // GPU side: pass in as constant
+                         FfxUInt32x2 numWorkGroupsAndMips,        // GPU side: pass in as constant
+                         FfxUInt32x4  rectInfo)                    // left, top, width, height
+{
+    SpdSetup(dispatchThreadGroupCountXY, workGroupOffset, numWorkGroupsAndMips, rectInfo, -1);
+}
+#endif // #ifdef FFX_CPU
+
+
+//==============================================================================================================================
+//                                                     NON-PACKED VERSION
+//==============================================================================================================================
+#ifdef FFX_GPU
+#ifdef SPD_PACKED_ONLY
+// Avoid compiler error
+FfxFloat32x4 SpdLoadSourceImage(FfxInt32x2 p, FfxUInt32 slice)
+{
+    return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+
+FfxFloat32x4 SpdLoad(FfxInt32x2 p, FfxUInt32 slice)
+{
+    return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+void SpdStore(FfxInt32x2 p, FfxFloat32x4 value, FfxUInt32 mip, FfxUInt32 slice)
+{
+}
+FfxFloat32x4 SpdLoadIntermediate(FfxUInt32 x, FfxUInt32 y)
+{
+    return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+void SpdStoreIntermediate(FfxUInt32 x, FfxUInt32 y, FfxFloat32x4 value)
+{
+}
+FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3)
+{
+    return FfxFloat32x4(0.0, 0.0, 0.0, 0.0);
+}
+#endif // #ifdef SPD_PACKED_ONLY
+
+//_____________________________________________________________/\_______________________________________________________________
+#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
+#extension GL_KHR_shader_subgroup_quad:require
+#endif
+
+void SpdWorkgroupShuffleBarrier()
+{
+#ifdef FFX_GLSL
+    barrier();
+#endif
+#ifdef FFX_HLSL
+    GroupMemoryBarrierWithGroupSync();
+#endif
+}
+
+// Only last active workgroup should proceed
+bool SpdExitWorkgroup(FfxUInt32 numWorkGroups, FfxUInt32 localInvocationIndex, FfxUInt32 slice)
+{
+    // global atomic counter
+    if (localInvocationIndex == 0)
+    {
+        SpdIncreaseAtomicCounter(slice);
+    }
+
+    SpdWorkgroupShuffleBarrier();
+    return (SpdGetAtomicCounter() != (numWorkGroups - 1));
+}
+
+// User defined: FfxFloat32x4 SpdReduce4(FfxFloat32x4 v0, FfxFloat32x4 v1, FfxFloat32x4 v2, FfxFloat32x4 v3);
+FfxFloat32x4 SpdReduceQuad(FfxFloat32x4 v)
+{
+#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
+
+    FfxFloat32x4 v0 = v;
+    FfxFloat32x4 v1 = subgroupQuadSwapHorizontal(v);
+    FfxFloat32x4 v2 = subgroupQuadSwapVertical(v);
+    FfxFloat32x4 v3 = subgroupQuadSwapDiagonal(v);
+    return SpdReduce4(v0, v1, v2, v3);
+
+#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
+
+    // requires SM6.0
+    FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
+    FfxFloat32x4     v0   = v;
+    FfxFloat32x4     v1   = WaveReadLaneAt(v, quad | 1);
+    FfxFloat32x4     v2   = WaveReadLaneAt(v, quad | 2);
+    FfxFloat32x4     v3   = WaveReadLaneAt(v, quad | 3);
+    return SpdReduce4(v0, v1, v2, v3);
+/*
+    // if SM6.0 is not available, you can use the AMD shader intrinsics
+    // the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
+    // https://gpuopen.com/amd-gpu-services-ags-library/
+    // works for DX11
+    FfxFloat32x4 v0 = v;
+    FfxFloat32x4 v1;
+    v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    FfxFloat32x4 v2;
+    v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    FfxFloat32x4 v3;
+    v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    return SpdReduce4(v0, v1, v2, v3);
+    */
+#endif
+    return v;
+}
+
+FfxFloat32x4 SpdReduceIntermediate(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
+{
+    FfxFloat32x4 v0 = SpdLoadIntermediate(i0.x, i0.y);
+    FfxFloat32x4 v1 = SpdLoadIntermediate(i1.x, i1.y);
+    FfxFloat32x4 v2 = SpdLoadIntermediate(i2.x, i2.y);
+    FfxFloat32x4 v3 = SpdLoadIntermediate(i3.x, i3.y);
+    return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+    FfxFloat32x4 v0 = SpdLoad(FfxInt32x2(i0), slice);
+    FfxFloat32x4 v1 = SpdLoad(FfxInt32x2(i1), slice);
+    FfxFloat32x4 v2 = SpdLoad(FfxInt32x2(i2), slice);
+    FfxFloat32x4 v3 = SpdLoad(FfxInt32x2(i3), slice);
+    return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoad4(FfxUInt32x2 base, FfxUInt32 slice)
+{
+    return SpdReduceLoad4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+}
+
+FfxFloat32x4 SpdReduceLoadSourceImage4(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+    FfxFloat32x4 v0 = SpdLoadSourceImage(FfxInt32x2(i0), slice);
+    FfxFloat32x4 v1 = SpdLoadSourceImage(FfxInt32x2(i1), slice);
+    FfxFloat32x4 v2 = SpdLoadSourceImage(FfxInt32x2(i2), slice);
+    FfxFloat32x4 v3 = SpdLoadSourceImage(FfxInt32x2(i3), slice);
+    return SpdReduce4(v0, v1, v2, v3);
+}
+
+FfxFloat32x4 SpdReduceLoadSourceImage(FfxUInt32x2 base, FfxUInt32 slice)
+{
+#ifdef SPD_LINEAR_SAMPLER
+    return SpdLoadSourceImage(FfxInt32x2(base), slice);
+#else
+    return SpdReduceLoadSourceImage4(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+#endif
+}
+
+void SpdDownsampleMips_0_1_Intrinsics(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+    FfxFloat32x4 v[4];
+
+    FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+    FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+    v[0]     = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[0], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+    v[1] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[1], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+    v[2] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[2], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+    v[3] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[3], 0, slice);
+
+    if (mip <= 1)
+        return;
+
+    v[0] = SpdReduceQuad(v[0]);
+    v[1] = SpdReduceQuad(v[1]);
+    v[2] = SpdReduceQuad(v[2]);
+    v[3] = SpdReduceQuad(v[3]);
+
+    if ((localInvocationIndex % 4) == 0)
+    {
+        SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
+        SpdStoreIntermediate(x / 2, y / 2, v[0]);
+
+        SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
+        SpdStoreIntermediate(x / 2 + 8, y / 2, v[1]);
+
+        SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
+        SpdStoreIntermediate(x / 2, y / 2 + 8, v[2]);
+
+        SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
+        SpdStoreIntermediate(x / 2 + 8, y / 2 + 8, v[3]);
+    }
+}
+
+void SpdDownsampleMips_0_1_LDS(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+    FfxFloat32x4 v[4];
+
+    FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+    FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+    v[0]     = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[0], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+    v[1] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[1], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+    v[2] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[2], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+    v[3] = SpdReduceLoadSourceImage(tex, slice);
+    SpdStore(pix, v[3], 0, slice);
+
+    if (mip <= 1)
+        return;
+
+    for (FfxUInt32 i = 0; i < 4; i++)
+    {
+        SpdStoreIntermediate(x, y, v[i]);
+        SpdWorkgroupShuffleBarrier();
+        if (localInvocationIndex < 64)
+        {
+            v[i] = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+            SpdStore(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
+        }
+        SpdWorkgroupShuffleBarrier();
+    }
+
+    if (localInvocationIndex < 64)
+    {
+        SpdStoreIntermediate(x + 0, y + 0, v[0]);
+        SpdStoreIntermediate(x + 8, y + 0, v[1]);
+        SpdStoreIntermediate(x + 0, y + 8, v[2]);
+        SpdStoreIntermediate(x + 8, y + 8, v[3]);
+    }
+}
+
+void SpdDownsampleMips_0_1(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    SpdDownsampleMips_0_1_LDS(x, y, workGroupID, localInvocationIndex, mip, slice);
+#else
+    SpdDownsampleMips_0_1_Intrinsics(x, y, workGroupID, localInvocationIndex, mip, slice);
+#endif
+}
+
+
+void SpdDownsampleMip_2(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    if (localInvocationIndex < 64)
+    {
+        FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+        SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS, try to reduce bank conflicts
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // ...
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        SpdStoreIntermediate(x * 2 + y % 2, y * 2, v);
+    }
+#else
+    FfxFloat32x4 v = SpdLoadIntermediate(x, y);
+    v        = SpdReduceQuad(v);
+    // quad index 0 stores result
+    if (localInvocationIndex % 4 == 0)
+    {
+        SpdStore(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+        SpdStoreIntermediate(x + (y / 2) % 2, y, v);
+    }
+#endif
+}
+
+void SpdDownsampleMip_3(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    if (localInvocationIndex < 16)
+    {
+        // x 0 x 0
+        // 0 0 0 0
+        // 0 x 0 x
+        // 0 0 0 0
+        FfxFloat32x4 v =
+            SpdReduceIntermediate(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
+        SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS
+        // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
+        // ...
+        // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
+        // ...
+        // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
+        // ...
+        SpdStoreIntermediate(x * 4 + y, y * 4, v);
+    }
+#else
+    if (localInvocationIndex < 64)
+    {
+        FfxFloat32x4 v = SpdLoadIntermediate(x * 2 + y % 2, y * 2);
+        v        = SpdReduceQuad(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStore(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+            SpdStoreIntermediate(x * 2 + y / 2, y * 2, v);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMip_4(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    if (localInvocationIndex < 4)
+    {
+        // x 0 0 0 x 0 0 0
+        // ...
+        // 0 x 0 0 0 x 0 0
+        FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
+                                         FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
+                                         FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
+                                         FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
+        SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS
+        // x x x x 0 ...
+        // 0 ...
+        SpdStoreIntermediate(x + y * 2, 0, v);
+    }
+#else
+    if (localInvocationIndex < 16)
+    {
+        FfxFloat32x4 v = SpdLoadIntermediate(x * 4 + y, y * 4);
+        v        = SpdReduceQuad(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStore(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+            SpdStoreIntermediate(x / 2 + y, 0, v);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMip_5(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    if (localInvocationIndex < 1)
+    {
+        // x x x x 0 ...
+        // 0 ...
+        FfxFloat32x4 v = SpdReduceIntermediate(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
+        SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
+    }
+#else
+    if (localInvocationIndex < 4)
+    {
+        FfxFloat32x4 v = SpdLoadIntermediate(localInvocationIndex, 0);
+        v        = SpdReduceQuad(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStore(FfxInt32x2(workGroupID.xy), v, mip, slice);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMips_6_7(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
+{
+    FfxInt32x2   tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
+    FfxInt32x2   pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
+    FfxFloat32x4 v0  = SpdReduceLoad4(tex, slice);
+    SpdStore(pix, v0, 6, slice);
+
+    tex       = FfxInt32x2(x * 4 + 2, y * 4 + 0);
+    pix       = FfxInt32x2(x * 2 + 1, y * 2 + 0);
+    FfxFloat32x4 v1 = SpdReduceLoad4(tex, slice);
+    SpdStore(pix, v1, 6, slice);
+
+    tex       = FfxInt32x2(x * 4 + 0, y * 4 + 2);
+    pix       = FfxInt32x2(x * 2 + 0, y * 2 + 1);
+    FfxFloat32x4 v2 = SpdReduceLoad4(tex, slice);
+    SpdStore(pix, v2, 6, slice);
+
+    tex       = FfxInt32x2(x * 4 + 2, y * 4 + 2);
+    pix       = FfxInt32x2(x * 2 + 1, y * 2 + 1);
+    FfxFloat32x4 v3 = SpdReduceLoad4(tex, slice);
+    SpdStore(pix, v3, 6, slice);
+
+    if (mips <= 7)
+        return;
+    // no barrier needed, working on values only from the same thread
+
+    FfxFloat32x4 v = SpdReduce4(v0, v1, v2, v3);
+    SpdStore(FfxInt32x2(x, y), v, 7, slice);
+    SpdStoreIntermediate(x, y, v);
+}
+
+void SpdDownsampleNextFour(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
+{
+    if (mips <= baseMip)
+        return;
+    SpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_2(x, y, workGroupID, localInvocationIndex, baseMip, slice);
+
+    if (mips <= baseMip + 1)
+        return;
+    SpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_3(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
+
+    if (mips <= baseMip + 2)
+        return;
+    SpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_4(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
+
+    if (mips <= baseMip + 3)
+        return;
+    SpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_5(workGroupID, localInvocationIndex, baseMip + 3, slice);
+}
+
+void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
+{
+    FfxUInt32x2        sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
+    FfxUInt32 x      = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
+    FfxUInt32 y      = sub_xy.y + 8 * ((localInvocationIndex >> 7));
+    SpdDownsampleMips_0_1(x, y, workGroupID, localInvocationIndex, mips, slice);
+
+    SpdDownsampleNextFour(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
+
+    if (mips <= 6)
+        return;
+
+    if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
+        return;
+
+    SpdResetAtomicCounter(slice);
+
+    // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
+    SpdDownsampleMips_6_7(x, y, mips, slice);
+
+    SpdDownsampleNextFour(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
+}
+
+void SpdDownsample(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
+{
+    SpdDownsample(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
+}
+
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+//==============================================================================================================================
+//                                                       PACKED VERSION
+//==============================================================================================================================
+
+#if FFX_HALF
+
+#ifdef FFX_GLSL
+#extension GL_EXT_shader_subgroup_extended_types_float16:require
+#endif
+
+FfxFloat16x4 SpdReduceQuadH(FfxFloat16x4 v)
+{
+#if defined(FFX_GLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
+    FfxFloat16x4 v0 = v;
+    FfxFloat16x4 v1 = subgroupQuadSwapHorizontal(v);
+    FfxFloat16x4 v2 = subgroupQuadSwapVertical(v);
+    FfxFloat16x4 v3 = subgroupQuadSwapDiagonal(v);
+    return SpdReduce4H(v0, v1, v2, v3);
+#elif defined(FFX_HLSL) && !defined(SPD_NO_WAVE_OPERATIONS)
+    // requires SM6.0
+    FfxUInt32 quad = WaveGetLaneIndex() & (~0x3);
+    FfxFloat16x4        v0   = v;
+    FfxFloat16x4        v1   = WaveReadLaneAt(v, quad | 1);
+    FfxFloat16x4        v2   = WaveReadLaneAt(v, quad | 2);
+    FfxFloat16x4        v3   = WaveReadLaneAt(v, quad | 3);
+    return SpdReduce4H(v0, v1, v2, v3);
+/*
+    // if SM6.0 is not available, you can use the AMD shader intrinsics
+    // the AMD shader intrinsics are available in AMD GPU Services (AGS) library:
+    // https://gpuopen.com/amd-gpu-services-ags-library/
+    // works for DX11
+    FfxFloat16x4 v0 = v;
+    FfxFloat16x4 v1;
+    v1.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    v1.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX1);
+    FfxFloat16x4 v2;
+    v2.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    v2.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_SwapX2);
+    FfxFloat16x4 v3;
+    v3.x = AmdExtD3DShaderIntrinsics_SwizzleF(v.x, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.y = AmdExtD3DShaderIntrinsics_SwizzleF(v.y, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.z = AmdExtD3DShaderIntrinsics_SwizzleF(v.z, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    v3.w = AmdExtD3DShaderIntrinsics_SwizzleF(v.w, AmdExtD3DShaderIntrinsicsSwizzle_ReverseX4);
+    return SpdReduce4H(v0, v1, v2, v3);
+    */
+#endif
+    return FfxFloat16x4(0.0, 0.0, 0.0, 0.0);
+}
+
+FfxFloat16x4 SpdReduceIntermediateH(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3)
+{
+    FfxFloat16x4 v0 = SpdLoadIntermediateH(i0.x, i0.y);
+    FfxFloat16x4 v1 = SpdLoadIntermediateH(i1.x, i1.y);
+    FfxFloat16x4 v2 = SpdLoadIntermediateH(i2.x, i2.y);
+    FfxFloat16x4 v3 = SpdLoadIntermediateH(i3.x, i3.y);
+    return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+    FfxFloat16x4 v0 = SpdLoadH(FfxInt32x2(i0), slice);
+    FfxFloat16x4 v1 = SpdLoadH(FfxInt32x2(i1), slice);
+    FfxFloat16x4 v2 = SpdLoadH(FfxInt32x2(i2), slice);
+    FfxFloat16x4 v3 = SpdLoadH(FfxInt32x2(i3), slice);
+    return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoad4H(FfxUInt32x2 base, FfxUInt32 slice)
+{
+    return SpdReduceLoad4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+}
+
+FfxFloat16x4 SpdReduceLoadSourceImage4H(FfxUInt32x2 i0, FfxUInt32x2 i1, FfxUInt32x2 i2, FfxUInt32x2 i3, FfxUInt32 slice)
+{
+    FfxFloat16x4 v0 = SpdLoadSourceImageH(FfxInt32x2(i0), slice);
+    FfxFloat16x4 v1 = SpdLoadSourceImageH(FfxInt32x2(i1), slice);
+    FfxFloat16x4 v2 = SpdLoadSourceImageH(FfxInt32x2(i2), slice);
+    FfxFloat16x4 v3 = SpdLoadSourceImageH(FfxInt32x2(i3), slice);
+    return SpdReduce4H(v0, v1, v2, v3);
+}
+
+FfxFloat16x4 SpdReduceLoadSourceImageH(FfxUInt32x2 base, FfxUInt32 slice)
+{
+#ifdef SPD_LINEAR_SAMPLER
+    return SpdLoadSourceImageH(FfxInt32x2(base), slice);
+#else
+    return SpdReduceLoadSourceImage4H(FfxUInt32x2(base + FfxUInt32x2(0, 0)), FfxUInt32x2(base + FfxUInt32x2(0, 1)), FfxUInt32x2(base + FfxUInt32x2(1, 0)), FfxUInt32x2(base + FfxUInt32x2(1, 1)), slice);
+#endif
+}
+
+void SpdDownsampleMips_0_1_IntrinsicsH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+    FfxFloat16x4 v[4];
+
+    FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+    FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+    v[0]     = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[0], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+    v[1] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[1], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+    v[2] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[2], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+    v[3] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[3], 0, slice);
+
+    if (mips <= 1)
+        return;
+
+    v[0] = SpdReduceQuadH(v[0]);
+    v[1] = SpdReduceQuadH(v[1]);
+    v[2] = SpdReduceQuadH(v[2]);
+    v[3] = SpdReduceQuadH(v[3]);
+
+    if ((localInvocationIndex % 4) == 0)
+    {
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2), v[0], 1, slice);
+        SpdStoreIntermediateH(x / 2, y / 2, v[0]);
+
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2), v[1], 1, slice);
+        SpdStoreIntermediateH(x / 2 + 8, y / 2, v[1]);
+
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2, y / 2 + 8), v[2], 1, slice);
+        SpdStoreIntermediateH(x / 2, y / 2 + 8, v[2]);
+
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x / 2 + 8, y / 2 + 8), v[3], 1, slice);
+        SpdStoreIntermediateH(x / 2 + 8, y / 2 + 8, v[3]);
+    }
+}
+
+void SpdDownsampleMips_0_1_LDSH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+    FfxFloat16x4 v[4];
+
+    FfxInt32x2 tex = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2);
+    FfxInt32x2 pix = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y);
+    v[0]     = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[0], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y);
+    v[1] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[1], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x, y + 16);
+    v[2] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[2], 0, slice);
+
+    tex  = FfxInt32x2(workGroupID.xy * 64) + FfxInt32x2(x * 2 + 32, y * 2 + 32);
+    pix  = FfxInt32x2(workGroupID.xy * 32) + FfxInt32x2(x + 16, y + 16);
+    v[3] = SpdReduceLoadSourceImageH(tex, slice);
+    SpdStoreH(pix, v[3], 0, slice);
+
+    if (mips <= 1)
+        return;
+
+    for (FfxInt32 i = 0; i < 4; i++)
+    {
+        SpdStoreIntermediateH(x, y, v[i]);
+        SpdWorkgroupShuffleBarrier();
+        if (localInvocationIndex < 64)
+        {
+            v[i] = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+            SpdStoreH(FfxInt32x2(workGroupID.xy * 16) + FfxInt32x2(x + (i % 2) * 8, y + (i / 2) * 8), v[i], 1, slice);
+        }
+        SpdWorkgroupShuffleBarrier();
+    }
+
+    if (localInvocationIndex < 64)
+    {
+        SpdStoreIntermediateH(x + 0, y + 0, v[0]);
+        SpdStoreIntermediateH(x + 8, y + 0, v[1]);
+        SpdStoreIntermediateH(x + 0, y + 8, v[2]);
+        SpdStoreIntermediateH(x + 8, y + 8, v[3]);
+    }
+}
+
+void SpdDownsampleMips_0_1H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    SpdDownsampleMips_0_1_LDSH(x, y, workGroupID, localInvocationIndex, mips, slice);
+#else
+    SpdDownsampleMips_0_1_IntrinsicsH(x, y, workGroupID, localInvocationIndex, mips, slice);
+#endif
+}
+
+
+void SpdDownsampleMip_2H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    if (localInvocationIndex < 64)
+    {
+        FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 2 + 0, y * 2 + 0), FfxUInt32x2(x * 2 + 1, y * 2 + 0), FfxUInt32x2(x * 2 + 0, y * 2 + 1), FfxUInt32x2(x * 2 + 1, y * 2 + 1));
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS, try to reduce bank conflicts
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0 x
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        // ...
+        // x 0 x 0 x 0 x 0 x 0 x 0 x 0 x 0
+        SpdStoreIntermediateH(x * 2 + y % 2, y * 2, v);
+    }
+#else
+    FfxFloat16x4 v = SpdLoadIntermediateH(x, y);
+    v     = SpdReduceQuadH(v);
+    // quad index 0 stores result
+    if (localInvocationIndex % 4 == 0)
+    {
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 8) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+        SpdStoreIntermediateH(x + (y / 2) % 2, y, v);
+    }
+#endif
+}
+
+void SpdDownsampleMip_3H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    if (localInvocationIndex < 16)
+    {
+        // x 0 x 0
+        // 0 0 0 0
+        // 0 x 0 x
+        // 0 0 0 0
+        FfxFloat16x4 v =
+            SpdReduceIntermediateH(FfxUInt32x2(x * 4 + 0 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 2 + 0, y * 4 + 0), FfxUInt32x2(x * 4 + 0 + 1, y * 4 + 2), FfxUInt32x2(x * 4 + 2 + 1, y * 4 + 2));
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS
+        // x 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+        // 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0 0
+        // ...
+        // 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x 0
+        // ...
+        // 0 0 0 x 0 0 0 x 0 0 0 x 0 0 0 x
+        // ...
+        SpdStoreIntermediateH(x * 4 + y, y * 4, v);
+    }
+#else
+    if (localInvocationIndex < 64)
+    {
+        FfxFloat16x4 v = SpdLoadIntermediateH(x * 2 + y % 2, y * 2);
+        v     = SpdReduceQuadH(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStoreH(FfxInt32x2(workGroupID.xy * 4) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+            SpdStoreIntermediateH(x * 2 + y / 2, y * 2, v);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMip_4H(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    if (localInvocationIndex < 4)
+    {
+        // x 0 0 0 x 0 0 0
+        // ...
+        // 0 x 0 0 0 x 0 0
+        FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(x * 8 + 0 + 0 + y * 2, y * 8 + 0),
+                                       FfxUInt32x2(x * 8 + 4 + 0 + y * 2, y * 8 + 0),
+                                       FfxUInt32x2(x * 8 + 0 + 1 + y * 2, y * 8 + 4),
+                                       FfxUInt32x2(x * 8 + 4 + 1 + y * 2, y * 8 + 4));
+        SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x, y), v, mip, slice);
+        // store to LDS
+        // x x x x 0 ...
+        // 0 ...
+        SpdStoreIntermediateH(x + y * 2, 0, v);
+    }
+#else
+    if (localInvocationIndex < 16)
+    {
+        FfxFloat16x4 v = SpdLoadIntermediateH(x * 4 + y, y * 4);
+        v     = SpdReduceQuadH(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStoreH(FfxInt32x2(workGroupID.xy * 2) + FfxInt32x2(x / 2, y / 2), v, mip, slice);
+            SpdStoreIntermediateH(x / 2 + y, 0, v);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMip_5H(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mip, FfxUInt32 slice)
+{
+#ifdef SPD_NO_WAVE_OPERATIONS
+    if (localInvocationIndex < 1)
+    {
+        // x x x x 0 ...
+        // 0 ...
+        FfxFloat16x4 v = SpdReduceIntermediateH(FfxUInt32x2(0, 0), FfxUInt32x2(1, 0), FfxUInt32x2(2, 0), FfxUInt32x2(3, 0));
+        SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
+    }
+#else
+    if (localInvocationIndex < 4)
+    {
+        FfxFloat16x4 v = SpdLoadIntermediateH(localInvocationIndex, 0);
+        v     = SpdReduceQuadH(v);
+        // quad index 0 stores result
+        if (localInvocationIndex % 4 == 0)
+        {
+            SpdStoreH(FfxInt32x2(workGroupID.xy), v, mip, slice);
+        }
+    }
+#endif
+}
+
+void SpdDownsampleMips_6_7H(FfxUInt32 x, FfxUInt32 y, FfxUInt32 mips, FfxUInt32 slice)
+{
+    FfxInt32x2 tex = FfxInt32x2(x * 4 + 0, y * 4 + 0);
+    FfxInt32x2 pix = FfxInt32x2(x * 2 + 0, y * 2 + 0);
+    FfxFloat16x4  v0  = SpdReduceLoad4H(tex, slice);
+    SpdStoreH(pix, v0, 6, slice);
+
+    tex    = FfxInt32x2(x * 4 + 2, y * 4 + 0);
+    pix    = FfxInt32x2(x * 2 + 1, y * 2 + 0);
+    FfxFloat16x4 v1 = SpdReduceLoad4H(tex, slice);
+    SpdStoreH(pix, v1, 6, slice);
+
+    tex    = FfxInt32x2(x * 4 + 0, y * 4 + 2);
+    pix    = FfxInt32x2(x * 2 + 0, y * 2 + 1);
+    FfxFloat16x4 v2 = SpdReduceLoad4H(tex, slice);
+    SpdStoreH(pix, v2, 6, slice);
+
+    tex    = FfxInt32x2(x * 4 + 2, y * 4 + 2);
+    pix    = FfxInt32x2(x * 2 + 1, y * 2 + 1);
+    FfxFloat16x4 v3 = SpdReduceLoad4H(tex, slice);
+    SpdStoreH(pix, v3, 6, slice);
+
+    if (mips < 8)
+        return;
+    // no barrier needed, working on values only from the same thread
+
+    FfxFloat16x4 v = SpdReduce4H(v0, v1, v2, v3);
+    SpdStoreH(FfxInt32x2(x, y), v, 7, slice);
+    SpdStoreIntermediateH(x, y, v);
+}
+
+void SpdDownsampleNextFourH(FfxUInt32 x, FfxUInt32 y, FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 baseMip, FfxUInt32 mips, FfxUInt32 slice)
+{
+    if (mips <= baseMip)
+        return;
+    SpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_2H(x, y, workGroupID, localInvocationIndex, baseMip, slice);
+
+    if (mips <= baseMip + 1)
+        return;
+    SpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_3H(x, y, workGroupID, localInvocationIndex, baseMip + 1, slice);
+
+    if (mips <= baseMip + 2)
+        return;
+    SpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_4H(x, y, workGroupID, localInvocationIndex, baseMip + 2, slice);
+
+    if (mips <= baseMip + 3)
+        return;
+    SpdWorkgroupShuffleBarrier();
+    SpdDownsampleMip_5H(workGroupID, localInvocationIndex, baseMip + 3, slice);
+}
+
+void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice)
+{
+    FfxUInt32x2        sub_xy = ffxRemapForWaveReduction(localInvocationIndex % 64);
+    FfxUInt32 x      = sub_xy.x + 8 * ((localInvocationIndex >> 6) % 2);
+    FfxUInt32 y      = sub_xy.y + 8 * ((localInvocationIndex >> 7));
+
+    SpdDownsampleMips_0_1H(x, y, workGroupID, localInvocationIndex, mips, slice);
+
+    SpdDownsampleNextFourH(x, y, workGroupID, localInvocationIndex, 2, mips, slice);
+
+    if (mips < 7)
+        return;
+
+    if (SpdExitWorkgroup(numWorkGroups, localInvocationIndex, slice))
+        return;
+
+    SpdResetAtomicCounter(slice);
+
+    // After mip 6 there is only a single workgroup left that downsamples the remaining up to 64x64 texels.
+    SpdDownsampleMips_6_7H(x, y, mips, slice);
+
+    SpdDownsampleNextFourH(x, y, FfxUInt32x2(0, 0), localInvocationIndex, 8, mips, slice);
+}
+
+void SpdDownsampleH(FfxUInt32x2 workGroupID, FfxUInt32 localInvocationIndex, FfxUInt32 mips, FfxUInt32 numWorkGroups, FfxUInt32 slice, FfxUInt32x2 workGroupOffset)
+{
+    SpdDownsampleH(workGroupID + workGroupOffset, localInvocationIndex, mips, numWorkGroups, slice);
+}
+
+#endif // #if FFX_HALF
+#endif // #ifdef FFX_GPU
diff --git a/thirdparty/angle/LICENSE b/thirdparty/angle/LICENSE
new file mode 100644
index 0000000000..0f65fd60fd
--- /dev/null
+++ b/thirdparty/angle/LICENSE
@@ -0,0 +1,32 @@
+// Copyright 2018 The ANGLE Project Authors.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+//     Redistributions of source code must retain the above copyright
+//     notice, this list of conditions and the following disclaimer.
+//
+//     Redistributions in binary form must reproduce the above
+//     copyright notice, this list of conditions and the following
+//     disclaimer in the documentation and/or other materials provided
+//     with the distribution.
+//
+//     Neither the name of TransGaming Inc., Google Inc., 3DLabs Inc.
+//     Ltd., nor the names of their contributors may be used to endorse
+//     or promote products derived from this software without specific
+//     prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
diff --git a/thirdparty/angle/include/EGL/egl.h b/thirdparty/angle/include/EGL/egl.h
new file mode 100644
index 0000000000..97d0878cc7
--- /dev/null
+++ b/thirdparty/angle/include/EGL/egl.h
@@ -0,0 +1,342 @@
+#ifndef __egl_h_
+#define __egl_h_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright 2013-2020 The Khronos Group Inc.
+** SPDX-License-Identifier: Apache-2.0
+**
+** This header is generated from the Khronos EGL XML API Registry.
+** The current version of the Registry, generator scripts
+** used to make the header, and the header can be found at
+**   http://www.khronos.org/registry/egl
+**
+** Khronos $Git commit SHA1: 6fb1daea15 $ on $Git commit date: 2022-05-25 09:41:13 -0600 $
+*/
+
+#include <EGL/eglplatform.h>
+
+#ifndef EGL_EGL_PROTOTYPES
+#define EGL_EGL_PROTOTYPES 1
+#endif
+
+/* Generated on date 20220525 */
+
+/* Generated C header for:
+ * API: egl
+ * Versions considered: .*
+ * Versions emitted: .*
+ * Default extensions included: None
+ * Additional extensions included: _nomatch_^
+ * Extensions removed: _nomatch_^
+ */
+
+#ifndef EGL_VERSION_1_0
+#define EGL_VERSION_1_0 1
+typedef unsigned int EGLBoolean;
+typedef void *EGLDisplay;
+#include <KHR/khrplatform.h>
+#include <EGL/eglplatform.h>
+typedef void *EGLConfig;
+typedef void *EGLSurface;
+typedef void *EGLContext;
+typedef void (*__eglMustCastToProperFunctionPointerType)(void);
+#define EGL_ALPHA_SIZE                    0x3021
+#define EGL_BAD_ACCESS                    0x3002
+#define EGL_BAD_ALLOC                     0x3003
+#define EGL_BAD_ATTRIBUTE                 0x3004
+#define EGL_BAD_CONFIG                    0x3005
+#define EGL_BAD_CONTEXT                   0x3006
+#define EGL_BAD_CURRENT_SURFACE           0x3007
+#define EGL_BAD_DISPLAY                   0x3008
+#define EGL_BAD_MATCH                     0x3009
+#define EGL_BAD_NATIVE_PIXMAP             0x300A
+#define EGL_BAD_NATIVE_WINDOW             0x300B
+#define EGL_BAD_PARAMETER                 0x300C
+#define EGL_BAD_SURFACE                   0x300D
+#define EGL_BLUE_SIZE                     0x3022
+#define EGL_BUFFER_SIZE                   0x3020
+#define EGL_CONFIG_CAVEAT                 0x3027
+#define EGL_CONFIG_ID                     0x3028
+#define EGL_CORE_NATIVE_ENGINE            0x305B
+#define EGL_DEPTH_SIZE                    0x3025
+#define EGL_DONT_CARE                     EGL_CAST(EGLint,-1)
+#define EGL_DRAW                          0x3059
+#define EGL_EXTENSIONS                    0x3055
+#define EGL_FALSE                         0
+#define EGL_GREEN_SIZE                    0x3023
+#define EGL_HEIGHT                        0x3056
+#define EGL_LARGEST_PBUFFER               0x3058
+#define EGL_LEVEL                         0x3029
+#define EGL_MAX_PBUFFER_HEIGHT            0x302A
+#define EGL_MAX_PBUFFER_PIXELS            0x302B
+#define EGL_MAX_PBUFFER_WIDTH             0x302C
+#define EGL_NATIVE_RENDERABLE             0x302D
+#define EGL_NATIVE_VISUAL_ID              0x302E
+#define EGL_NATIVE_VISUAL_TYPE            0x302F
+#define EGL_NONE                          0x3038
+#define EGL_NON_CONFORMANT_CONFIG         0x3051
+#define EGL_NOT_INITIALIZED               0x3001
+#define EGL_NO_CONTEXT                    EGL_CAST(EGLContext,0)
+#define EGL_NO_DISPLAY                    EGL_CAST(EGLDisplay,0)
+#define EGL_NO_SURFACE                    EGL_CAST(EGLSurface,0)
+#define EGL_PBUFFER_BIT                   0x0001
+#define EGL_PIXMAP_BIT                    0x0002
+#define EGL_READ                          0x305A
+#define EGL_RED_SIZE                      0x3024
+#define EGL_SAMPLES                       0x3031
+#define EGL_SAMPLE_BUFFERS                0x3032
+#define EGL_SLOW_CONFIG                   0x3050
+#define EGL_STENCIL_SIZE                  0x3026
+#define EGL_SUCCESS                       0x3000
+#define EGL_SURFACE_TYPE                  0x3033
+#define EGL_TRANSPARENT_BLUE_VALUE        0x3035
+#define EGL_TRANSPARENT_GREEN_VALUE       0x3036
+#define EGL_TRANSPARENT_RED_VALUE         0x3037
+#define EGL_TRANSPARENT_RGB               0x3052
+#define EGL_TRANSPARENT_TYPE              0x3034
+#define EGL_TRUE                          1
+#define EGL_VENDOR                        0x3053
+#define EGL_VERSION                       0x3054
+#define EGL_WIDTH                         0x3057
+#define EGL_WINDOW_BIT                    0x0004
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCHOOSECONFIGPROC) (EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOPYBUFFERSPROC) (EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target);
+typedef EGLContext (EGLAPIENTRYP PFNEGLCREATECONTEXTPROC) (EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPBUFFERSURFACEPROC) (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPIXMAPSURFACEPROC) (EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEWINDOWSURFACEPROC) (EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYCONTEXTPROC) (EGLDisplay dpy, EGLContext ctx);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSURFACEPROC) (EGLDisplay dpy, EGLSurface surface);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETCONFIGATTRIBPROC) (EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETCONFIGSPROC) (EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETCURRENTDISPLAYPROC) (void);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLGETCURRENTSURFACEPROC) (EGLint readdraw);
+typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETDISPLAYPROC) (EGLNativeDisplayType display_id);
+typedef EGLint (EGLAPIENTRYP PFNEGLGETERRORPROC) (void);
+typedef __eglMustCastToProperFunctionPointerType (EGLAPIENTRYP PFNEGLGETPROCADDRESSPROC) (const char *procname);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLINITIALIZEPROC) (EGLDisplay dpy, EGLint *major, EGLint *minor);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLMAKECURRENTPROC) (EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYCONTEXTPROC) (EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYSTRINGPROC) (EGLDisplay dpy, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSPROC) (EGLDisplay dpy, EGLSurface surface);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLTERMINATEPROC) (EGLDisplay dpy);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLWAITGLPROC) (void);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLWAITNATIVEPROC) (EGLint engine);
+#if EGL_EGL_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglChooseConfig (EGLDisplay dpy, const EGLint *attrib_list, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+EGLAPI EGLBoolean EGLAPIENTRY eglCopyBuffers (EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target);
+EGLAPI EGLContext EGLAPIENTRY eglCreateContext (EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferSurface (EGLDisplay dpy, EGLConfig config, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurface (EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreateWindowSurface (EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyContext (EGLDisplay dpy, EGLContext ctx);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySurface (EGLDisplay dpy, EGLSurface surface);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigAttrib (EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetConfigs (EGLDisplay dpy, EGLConfig *configs, EGLint config_size, EGLint *num_config);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetCurrentDisplay (void);
+EGLAPI EGLSurface EGLAPIENTRY eglGetCurrentSurface (EGLint readdraw);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetDisplay (EGLNativeDisplayType display_id);
+EGLAPI EGLint EGLAPIENTRY eglGetError (void);
+EGLAPI __eglMustCastToProperFunctionPointerType EGLAPIENTRY eglGetProcAddress (const char *procname);
+EGLAPI EGLBoolean EGLAPIENTRY eglInitialize (EGLDisplay dpy, EGLint *major, EGLint *minor);
+EGLAPI EGLBoolean EGLAPIENTRY eglMakeCurrent (EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryContext (EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint *value);
+EGLAPI const char *EGLAPIENTRY eglQueryString (EGLDisplay dpy, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffers (EGLDisplay dpy, EGLSurface surface);
+EGLAPI EGLBoolean EGLAPIENTRY eglTerminate (EGLDisplay dpy);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitGL (void);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitNative (EGLint engine);
+#endif
+#endif /* EGL_VERSION_1_0 */
+
+#ifndef EGL_VERSION_1_1
+#define EGL_VERSION_1_1 1
+#define EGL_BACK_BUFFER                   0x3084
+#define EGL_BIND_TO_TEXTURE_RGB           0x3039
+#define EGL_BIND_TO_TEXTURE_RGBA          0x303A
+#define EGL_CONTEXT_LOST                  0x300E
+#define EGL_MIN_SWAP_INTERVAL             0x303B
+#define EGL_MAX_SWAP_INTERVAL             0x303C
+#define EGL_MIPMAP_TEXTURE                0x3082
+#define EGL_MIPMAP_LEVEL                  0x3083
+#define EGL_NO_TEXTURE                    0x305C
+#define EGL_TEXTURE_2D                    0x305F
+#define EGL_TEXTURE_FORMAT                0x3080
+#define EGL_TEXTURE_RGB                   0x305D
+#define EGL_TEXTURE_RGBA                  0x305E
+#define EGL_TEXTURE_TARGET                0x3081
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLBINDTEXIMAGEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLRELEASETEXIMAGEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSURFACEATTRIBPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPINTERVALPROC) (EGLDisplay dpy, EGLint interval);
+#if EGL_EGL_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglBindTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+EGLAPI EGLBoolean EGLAPIENTRY eglReleaseTexImage (EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+EGLAPI EGLBoolean EGLAPIENTRY eglSurfaceAttrib (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value);
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapInterval (EGLDisplay dpy, EGLint interval);
+#endif
+#endif /* EGL_VERSION_1_1 */
+
+#ifndef EGL_VERSION_1_2
+#define EGL_VERSION_1_2 1
+typedef unsigned int EGLenum;
+typedef void *EGLClientBuffer;
+#define EGL_ALPHA_FORMAT                  0x3088
+#define EGL_ALPHA_FORMAT_NONPRE           0x308B
+#define EGL_ALPHA_FORMAT_PRE              0x308C
+#define EGL_ALPHA_MASK_SIZE               0x303E
+#define EGL_BUFFER_PRESERVED              0x3094
+#define EGL_BUFFER_DESTROYED              0x3095
+#define EGL_CLIENT_APIS                   0x308D
+#define EGL_COLORSPACE                    0x3087
+#define EGL_COLORSPACE_sRGB               0x3089
+#define EGL_COLORSPACE_LINEAR             0x308A
+#define EGL_COLOR_BUFFER_TYPE             0x303F
+#define EGL_CONTEXT_CLIENT_TYPE           0x3097
+#define EGL_DISPLAY_SCALING               10000
+#define EGL_HORIZONTAL_RESOLUTION         0x3090
+#define EGL_LUMINANCE_BUFFER              0x308F
+#define EGL_LUMINANCE_SIZE                0x303D
+#define EGL_OPENGL_ES_BIT                 0x0001
+#define EGL_OPENVG_BIT                    0x0002
+#define EGL_OPENGL_ES_API                 0x30A0
+#define EGL_OPENVG_API                    0x30A1
+#define EGL_OPENVG_IMAGE                  0x3096
+#define EGL_PIXEL_ASPECT_RATIO            0x3092
+#define EGL_RENDERABLE_TYPE               0x3040
+#define EGL_RENDER_BUFFER                 0x3086
+#define EGL_RGB_BUFFER                    0x308E
+#define EGL_SINGLE_BUFFER                 0x3085
+#define EGL_SWAP_BEHAVIOR                 0x3093
+#define EGL_UNKNOWN                       EGL_CAST(EGLint,-1)
+#define EGL_VERTICAL_RESOLUTION           0x3091
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLBINDAPIPROC) (EGLenum api);
+typedef EGLenum (EGLAPIENTRYP PFNEGLQUERYAPIPROC) (void);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC) (EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLRELEASETHREADPROC) (void);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLWAITCLIENTPROC) (void);
+#if EGL_EGL_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglBindAPI (EGLenum api);
+EGLAPI EGLenum EGLAPIENTRY eglQueryAPI (void);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePbufferFromClientBuffer (EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglReleaseThread (void);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitClient (void);
+#endif
+#endif /* EGL_VERSION_1_2 */
+
+#ifndef EGL_VERSION_1_3
+#define EGL_VERSION_1_3 1
+#define EGL_CONFORMANT                    0x3042
+#define EGL_CONTEXT_CLIENT_VERSION        0x3098
+#define EGL_MATCH_NATIVE_PIXMAP           0x3041
+#define EGL_OPENGL_ES2_BIT                0x0004
+#define EGL_VG_ALPHA_FORMAT               0x3088
+#define EGL_VG_ALPHA_FORMAT_NONPRE        0x308B
+#define EGL_VG_ALPHA_FORMAT_PRE           0x308C
+#define EGL_VG_ALPHA_FORMAT_PRE_BIT       0x0040
+#define EGL_VG_COLORSPACE                 0x3087
+#define EGL_VG_COLORSPACE_sRGB            0x3089
+#define EGL_VG_COLORSPACE_LINEAR          0x308A
+#define EGL_VG_COLORSPACE_LINEAR_BIT      0x0020
+#endif /* EGL_VERSION_1_3 */
+
+#ifndef EGL_VERSION_1_4
+#define EGL_VERSION_1_4 1
+#define EGL_DEFAULT_DISPLAY               EGL_CAST(EGLNativeDisplayType,0)
+#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT   0x0200
+#define EGL_MULTISAMPLE_RESOLVE           0x3099
+#define EGL_MULTISAMPLE_RESOLVE_DEFAULT   0x309A
+#define EGL_MULTISAMPLE_RESOLVE_BOX       0x309B
+#define EGL_OPENGL_API                    0x30A2
+#define EGL_OPENGL_BIT                    0x0008
+#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT   0x0400
+typedef EGLContext (EGLAPIENTRYP PFNEGLGETCURRENTCONTEXTPROC) (void);
+#if EGL_EGL_PROTOTYPES
+EGLAPI EGLContext EGLAPIENTRY eglGetCurrentContext (void);
+#endif
+#endif /* EGL_VERSION_1_4 */
+
+#ifndef EGL_VERSION_1_5
+#define EGL_VERSION_1_5 1
+typedef void *EGLSync;
+typedef intptr_t EGLAttrib;
+typedef khronos_utime_nanoseconds_t EGLTime;
+typedef void *EGLImage;
+#define EGL_CONTEXT_MAJOR_VERSION         0x3098
+#define EGL_CONTEXT_MINOR_VERSION         0x30FB
+#define EGL_CONTEXT_OPENGL_PROFILE_MASK   0x30FD
+#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD
+#define EGL_NO_RESET_NOTIFICATION         0x31BE
+#define EGL_LOSE_CONTEXT_ON_RESET         0x31BF
+#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001
+#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002
+#define EGL_CONTEXT_OPENGL_DEBUG          0x31B0
+#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1
+#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS  0x31B2
+#define EGL_OPENGL_ES3_BIT                0x00000040
+#define EGL_CL_EVENT_HANDLE               0x309C
+#define EGL_SYNC_CL_EVENT                 0x30FE
+#define EGL_SYNC_CL_EVENT_COMPLETE        0x30FF
+#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE  0x30F0
+#define EGL_SYNC_TYPE                     0x30F7
+#define EGL_SYNC_STATUS                   0x30F1
+#define EGL_SYNC_CONDITION                0x30F8
+#define EGL_SIGNALED                      0x30F2
+#define EGL_UNSIGNALED                    0x30F3
+#define EGL_SYNC_FLUSH_COMMANDS_BIT       0x0001
+#define EGL_FOREVER                       0xFFFFFFFFFFFFFFFFull
+#define EGL_TIMEOUT_EXPIRED               0x30F5
+#define EGL_CONDITION_SATISFIED           0x30F6
+#define EGL_NO_SYNC                       EGL_CAST(EGLSync,0)
+#define EGL_SYNC_FENCE                    0x30F9
+#define EGL_GL_COLORSPACE                 0x309D
+#define EGL_GL_COLORSPACE_SRGB            0x3089
+#define EGL_GL_COLORSPACE_LINEAR          0x308A
+#define EGL_GL_RENDERBUFFER               0x30B9
+#define EGL_GL_TEXTURE_2D                 0x30B1
+#define EGL_GL_TEXTURE_LEVEL              0x30BC
+#define EGL_GL_TEXTURE_3D                 0x30B2
+#define EGL_GL_TEXTURE_ZOFFSET            0x30BD
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8
+#define EGL_IMAGE_PRESERVED               0x30D2
+#define EGL_NO_IMAGE                      EGL_CAST(EGLImage,0)
+typedef EGLSync (EGLAPIENTRYP PFNEGLCREATESYNCPROC) (EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCPROC) (EGLDisplay dpy, EGLSync sync);
+typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCPROC) (EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBPROC) (EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value);
+typedef EGLImage (EGLAPIENTRYP PFNEGLCREATEIMAGEPROC) (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYIMAGEPROC) (EGLDisplay dpy, EGLImage image);
+typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYPROC) (EGLenum platform, void *native_display, const EGLAttrib *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMWINDOWSURFACEPROC) (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLAttrib *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC) (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLAttrib *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLWAITSYNCPROC) (EGLDisplay dpy, EGLSync sync, EGLint flags);
+#if EGL_EGL_PROTOTYPES
+EGLAPI EGLSync EGLAPIENTRY eglCreateSync (EGLDisplay dpy, EGLenum type, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySync (EGLDisplay dpy, EGLSync sync);
+EGLAPI EGLint EGLAPIENTRY eglClientWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttrib (EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib *value);
+EGLAPI EGLImage EGLAPIENTRY eglCreateImage (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImage (EGLDisplay dpy, EGLImage image);
+EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplay (EGLenum platform, void *native_display, const EGLAttrib *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurface (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLAttrib *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurface (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglWaitSync (EGLDisplay dpy, EGLSync sync, EGLint flags);
+#endif
+#endif /* EGL_VERSION_1_5 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/thirdparty/angle/include/EGL/eglext.h b/thirdparty/angle/include/EGL/eglext.h
new file mode 100644
index 0000000000..d226b7f6da
--- /dev/null
+++ b/thirdparty/angle/include/EGL/eglext.h
@@ -0,0 +1,1486 @@
+#ifndef __eglext_h_
+#define __eglext_h_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright 2013-2020 The Khronos Group Inc.
+** SPDX-License-Identifier: Apache-2.0
+**
+** This header is generated from the Khronos EGL XML API Registry.
+** The current version of the Registry, generator scripts
+** used to make the header, and the header can be found at
+**   http://www.khronos.org/registry/egl
+**
+** Khronos $Git commit SHA1: 6fb1daea15 $ on $Git commit date: 2022-05-25 09:41:13 -0600 $
+*/
+
+#include <EGL/eglplatform.h>
+
+#define EGL_EGLEXT_VERSION 20220525
+
+/* Generated C header for:
+ * API: egl
+ * Versions considered: .*
+ * Versions emitted: _nomatch_^
+ * Default extensions included: egl
+ * Additional extensions included: _nomatch_^
+ * Extensions removed: _nomatch_^
+ */
+
+#ifndef EGL_KHR_cl_event
+#define EGL_KHR_cl_event 1
+#define EGL_CL_EVENT_HANDLE_KHR           0x309C
+#define EGL_SYNC_CL_EVENT_KHR             0x30FE
+#define EGL_SYNC_CL_EVENT_COMPLETE_KHR    0x30FF
+#endif /* EGL_KHR_cl_event */
+
+#ifndef EGL_KHR_cl_event2
+#define EGL_KHR_cl_event2 1
+typedef void *EGLSyncKHR;
+typedef intptr_t EGLAttribKHR;
+typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNC64KHRPROC) (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSync64KHR (EGLDisplay dpy, EGLenum type, const EGLAttribKHR *attrib_list);
+#endif
+#endif /* EGL_KHR_cl_event2 */
+
+#ifndef EGL_KHR_client_get_all_proc_addresses
+#define EGL_KHR_client_get_all_proc_addresses 1
+#endif /* EGL_KHR_client_get_all_proc_addresses */
+
+#ifndef EGL_KHR_config_attribs
+#define EGL_KHR_config_attribs 1
+#define EGL_CONFORMANT_KHR                0x3042
+#define EGL_VG_COLORSPACE_LINEAR_BIT_KHR  0x0020
+#define EGL_VG_ALPHA_FORMAT_PRE_BIT_KHR   0x0040
+#endif /* EGL_KHR_config_attribs */
+
+#ifndef EGL_KHR_context_flush_control
+#define EGL_KHR_context_flush_control 1
+#define EGL_CONTEXT_RELEASE_BEHAVIOR_NONE_KHR 0
+#define EGL_CONTEXT_RELEASE_BEHAVIOR_KHR  0x2097
+#define EGL_CONTEXT_RELEASE_BEHAVIOR_FLUSH_KHR 0x2098
+#endif /* EGL_KHR_context_flush_control */
+
+#ifndef EGL_KHR_create_context
+#define EGL_KHR_create_context 1
+#define EGL_CONTEXT_MAJOR_VERSION_KHR     0x3098
+#define EGL_CONTEXT_MINOR_VERSION_KHR     0x30FB
+#define EGL_CONTEXT_FLAGS_KHR             0x30FC
+#define EGL_CONTEXT_OPENGL_PROFILE_MASK_KHR 0x30FD
+#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR 0x31BD
+#define EGL_NO_RESET_NOTIFICATION_KHR     0x31BE
+#define EGL_LOSE_CONTEXT_ON_RESET_KHR     0x31BF
+#define EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR  0x00000001
+#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR 0x00000002
+#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR 0x00000004
+#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR 0x00000001
+#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT_KHR 0x00000002
+#define EGL_OPENGL_ES3_BIT_KHR            0x00000040
+#endif /* EGL_KHR_create_context */
+
+#ifndef EGL_KHR_create_context_no_error
+#define EGL_KHR_create_context_no_error 1
+#define EGL_CONTEXT_OPENGL_NO_ERROR_KHR   0x31B3
+#endif /* EGL_KHR_create_context_no_error */
+
+#ifndef EGL_KHR_debug
+#define EGL_KHR_debug 1
+typedef void *EGLLabelKHR;
+typedef void *EGLObjectKHR;
+typedef void (EGLAPIENTRY  *EGLDEBUGPROCKHR)(EGLenum error,const char *command,EGLint messageType,EGLLabelKHR threadLabel,EGLLabelKHR objectLabel,const char* message);
+#define EGL_OBJECT_THREAD_KHR             0x33B0
+#define EGL_OBJECT_DISPLAY_KHR            0x33B1
+#define EGL_OBJECT_CONTEXT_KHR            0x33B2
+#define EGL_OBJECT_SURFACE_KHR            0x33B3
+#define EGL_OBJECT_IMAGE_KHR              0x33B4
+#define EGL_OBJECT_SYNC_KHR               0x33B5
+#define EGL_OBJECT_STREAM_KHR             0x33B6
+#define EGL_DEBUG_MSG_CRITICAL_KHR        0x33B9
+#define EGL_DEBUG_MSG_ERROR_KHR           0x33BA
+#define EGL_DEBUG_MSG_WARN_KHR            0x33BB
+#define EGL_DEBUG_MSG_INFO_KHR            0x33BC
+#define EGL_DEBUG_CALLBACK_KHR            0x33B8
+typedef EGLint (EGLAPIENTRYP PFNEGLDEBUGMESSAGECONTROLKHRPROC) (EGLDEBUGPROCKHR callback, const EGLAttrib *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEBUGKHRPROC) (EGLint attribute, EGLAttrib *value);
+typedef EGLint (EGLAPIENTRYP PFNEGLLABELOBJECTKHRPROC) (EGLDisplay display, EGLenum objectType, EGLObjectKHR object, EGLLabelKHR label);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLint EGLAPIENTRY eglDebugMessageControlKHR (EGLDEBUGPROCKHR callback, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDebugKHR (EGLint attribute, EGLAttrib *value);
+EGLAPI EGLint EGLAPIENTRY eglLabelObjectKHR (EGLDisplay display, EGLenum objectType, EGLObjectKHR object, EGLLabelKHR label);
+#endif
+#endif /* EGL_KHR_debug */
+
+#ifndef EGL_KHR_display_reference
+#define EGL_KHR_display_reference 1
+#define EGL_TRACK_REFERENCES_KHR          0x3352
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBKHRPROC) (EGLDisplay dpy, EGLint name, EGLAttrib *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribKHR (EGLDisplay dpy, EGLint name, EGLAttrib *value);
+#endif
+#endif /* EGL_KHR_display_reference */
+
+#ifndef EGL_KHR_fence_sync
+#define EGL_KHR_fence_sync 1
+typedef khronos_utime_nanoseconds_t EGLTimeKHR;
+#ifdef KHRONOS_SUPPORT_INT64
+#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_KHR 0x30F0
+#define EGL_SYNC_CONDITION_KHR            0x30F8
+#define EGL_SYNC_FENCE_KHR                0x30F9
+typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESYNCKHRPROC) (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync);
+typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateSyncKHR (EGLDisplay dpy, EGLenum type, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncKHR (EGLDisplay dpy, EGLSyncKHR sync);
+EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags, EGLTimeKHR timeout);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint attribute, EGLint *value);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_KHR_fence_sync */
+
+#ifndef EGL_KHR_get_all_proc_addresses
+#define EGL_KHR_get_all_proc_addresses 1
+#endif /* EGL_KHR_get_all_proc_addresses */
+
+#ifndef EGL_KHR_gl_colorspace
+#define EGL_KHR_gl_colorspace 1
+#define EGL_GL_COLORSPACE_KHR             0x309D
+#define EGL_GL_COLORSPACE_SRGB_KHR        0x3089
+#define EGL_GL_COLORSPACE_LINEAR_KHR      0x308A
+#endif /* EGL_KHR_gl_colorspace */
+
+#ifndef EGL_KHR_gl_renderbuffer_image
+#define EGL_KHR_gl_renderbuffer_image 1
+#define EGL_GL_RENDERBUFFER_KHR           0x30B9
+#endif /* EGL_KHR_gl_renderbuffer_image */
+
+#ifndef EGL_KHR_gl_texture_2D_image
+#define EGL_KHR_gl_texture_2D_image 1
+#define EGL_GL_TEXTURE_2D_KHR             0x30B1
+#define EGL_GL_TEXTURE_LEVEL_KHR          0x30BC
+#endif /* EGL_KHR_gl_texture_2D_image */
+
+#ifndef EGL_KHR_gl_texture_3D_image
+#define EGL_KHR_gl_texture_3D_image 1
+#define EGL_GL_TEXTURE_3D_KHR             0x30B2
+#define EGL_GL_TEXTURE_ZOFFSET_KHR        0x30BD
+#endif /* EGL_KHR_gl_texture_3D_image */
+
+#ifndef EGL_KHR_gl_texture_cubemap_image
+#define EGL_KHR_gl_texture_cubemap_image 1
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X_KHR 0x30B3
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X_KHR 0x30B4
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y_KHR 0x30B5
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y_KHR 0x30B6
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z_KHR 0x30B7
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z_KHR 0x30B8
+#endif /* EGL_KHR_gl_texture_cubemap_image */
+
+#ifndef EGL_KHR_image
+#define EGL_KHR_image 1
+typedef void *EGLImageKHR;
+#define EGL_NATIVE_PIXMAP_KHR             0x30B0
+#define EGL_NO_IMAGE_KHR                  EGL_CAST(EGLImageKHR,0)
+typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEIMAGEKHRPROC) (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYIMAGEKHRPROC) (EGLDisplay dpy, EGLImageKHR image);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLImageKHR EGLAPIENTRY eglCreateImageKHR (EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyImageKHR (EGLDisplay dpy, EGLImageKHR image);
+#endif
+#endif /* EGL_KHR_image */
+
+#ifndef EGL_KHR_image_base
+#define EGL_KHR_image_base 1
+#define EGL_IMAGE_PRESERVED_KHR           0x30D2
+#endif /* EGL_KHR_image_base */
+
+#ifndef EGL_KHR_image_pixmap
+#define EGL_KHR_image_pixmap 1
+#endif /* EGL_KHR_image_pixmap */
+
+#ifndef EGL_KHR_lock_surface
+#define EGL_KHR_lock_surface 1
+#define EGL_READ_SURFACE_BIT_KHR          0x0001
+#define EGL_WRITE_SURFACE_BIT_KHR         0x0002
+#define EGL_LOCK_SURFACE_BIT_KHR          0x0080
+#define EGL_OPTIMAL_FORMAT_BIT_KHR        0x0100
+#define EGL_MATCH_FORMAT_KHR              0x3043
+#define EGL_FORMAT_RGB_565_EXACT_KHR      0x30C0
+#define EGL_FORMAT_RGB_565_KHR            0x30C1
+#define EGL_FORMAT_RGBA_8888_EXACT_KHR    0x30C2
+#define EGL_FORMAT_RGBA_8888_KHR          0x30C3
+#define EGL_MAP_PRESERVE_PIXELS_KHR       0x30C4
+#define EGL_LOCK_USAGE_HINT_KHR           0x30C5
+#define EGL_BITMAP_POINTER_KHR            0x30C6
+#define EGL_BITMAP_PITCH_KHR              0x30C7
+#define EGL_BITMAP_ORIGIN_KHR             0x30C8
+#define EGL_BITMAP_PIXEL_RED_OFFSET_KHR   0x30C9
+#define EGL_BITMAP_PIXEL_GREEN_OFFSET_KHR 0x30CA
+#define EGL_BITMAP_PIXEL_BLUE_OFFSET_KHR  0x30CB
+#define EGL_BITMAP_PIXEL_ALPHA_OFFSET_KHR 0x30CC
+#define EGL_BITMAP_PIXEL_LUMINANCE_OFFSET_KHR 0x30CD
+#define EGL_LOWER_LEFT_KHR                0x30CE
+#define EGL_UPPER_LEFT_KHR                0x30CF
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNLOCKSURFACEKHRPROC) (EGLDisplay dpy, EGLSurface surface);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglLockSurfaceKHR (EGLDisplay dpy, EGLSurface surface, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglUnlockSurfaceKHR (EGLDisplay dpy, EGLSurface surface);
+#endif
+#endif /* EGL_KHR_lock_surface */
+
+#ifndef EGL_KHR_lock_surface2
+#define EGL_KHR_lock_surface2 1
+#define EGL_BITMAP_PIXEL_SIZE_KHR         0x3110
+#endif /* EGL_KHR_lock_surface2 */
+
+#ifndef EGL_KHR_lock_surface3
+#define EGL_KHR_lock_surface3 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACE64KHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurface64KHR (EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLAttribKHR *value);
+#endif
+#endif /* EGL_KHR_lock_surface3 */
+
+#ifndef EGL_KHR_mutable_render_buffer
+#define EGL_KHR_mutable_render_buffer 1
+#define EGL_MUTABLE_RENDER_BUFFER_BIT_KHR 0x1000
+#endif /* EGL_KHR_mutable_render_buffer */
+
+#ifndef EGL_KHR_no_config_context
+#define EGL_KHR_no_config_context 1
+#define EGL_NO_CONFIG_KHR                 EGL_CAST(EGLConfig,0)
+#endif /* EGL_KHR_no_config_context */
+
+#ifndef EGL_KHR_partial_update
+#define EGL_KHR_partial_update 1
+#define EGL_BUFFER_AGE_KHR                0x313D
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSETDAMAGEREGIONKHRPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSetDamageRegionKHR (EGLDisplay dpy, EGLSurface surface, EGLint *rects, EGLint n_rects);
+#endif
+#endif /* EGL_KHR_partial_update */
+
+#ifndef EGL_KHR_platform_android
+#define EGL_KHR_platform_android 1
+#define EGL_PLATFORM_ANDROID_KHR          0x3141
+#endif /* EGL_KHR_platform_android */
+
+#ifndef EGL_KHR_platform_gbm
+#define EGL_KHR_platform_gbm 1
+#define EGL_PLATFORM_GBM_KHR              0x31D7
+#endif /* EGL_KHR_platform_gbm */
+
+#ifndef EGL_KHR_platform_wayland
+#define EGL_KHR_platform_wayland 1
+#define EGL_PLATFORM_WAYLAND_KHR          0x31D8
+#endif /* EGL_KHR_platform_wayland */
+
+#ifndef EGL_KHR_platform_x11
+#define EGL_KHR_platform_x11 1
+#define EGL_PLATFORM_X11_KHR              0x31D5
+#define EGL_PLATFORM_X11_SCREEN_KHR       0x31D6
+#endif /* EGL_KHR_platform_x11 */
+
+#ifndef EGL_KHR_reusable_sync
+#define EGL_KHR_reusable_sync 1
+#ifdef KHRONOS_SUPPORT_INT64
+#define EGL_SYNC_STATUS_KHR               0x30F1
+#define EGL_SIGNALED_KHR                  0x30F2
+#define EGL_UNSIGNALED_KHR                0x30F3
+#define EGL_TIMEOUT_EXPIRED_KHR           0x30F5
+#define EGL_CONDITION_SATISFIED_KHR       0x30F6
+#define EGL_SYNC_TYPE_KHR                 0x30F7
+#define EGL_SYNC_REUSABLE_KHR             0x30FA
+#define EGL_SYNC_FLUSH_COMMANDS_BIT_KHR   0x0001
+#define EGL_FOREVER_KHR                   0xFFFFFFFFFFFFFFFFull
+#define EGL_NO_SYNC_KHR                   EGL_CAST(EGLSyncKHR,0)
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLenum mode);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_KHR_reusable_sync */
+
+#ifndef EGL_KHR_stream
+#define EGL_KHR_stream 1
+typedef void *EGLStreamKHR;
+typedef khronos_uint64_t EGLuint64KHR;
+#ifdef KHRONOS_SUPPORT_INT64
+#define EGL_NO_STREAM_KHR                 EGL_CAST(EGLStreamKHR,0)
+#define EGL_CONSUMER_LATENCY_USEC_KHR     0x3210
+#define EGL_PRODUCER_FRAME_KHR            0x3212
+#define EGL_CONSUMER_FRAME_KHR            0x3213
+#define EGL_STREAM_STATE_KHR              0x3214
+#define EGL_STREAM_STATE_CREATED_KHR      0x3215
+#define EGL_STREAM_STATE_CONNECTING_KHR   0x3216
+#define EGL_STREAM_STATE_EMPTY_KHR        0x3217
+#define EGL_STREAM_STATE_NEW_FRAME_AVAILABLE_KHR 0x3218
+#define EGL_STREAM_STATE_OLD_FRAME_AVAILABLE_KHR 0x3219
+#define EGL_STREAM_STATE_DISCONNECTED_KHR 0x321A
+#define EGL_BAD_STREAM_KHR                0x321B
+#define EGL_BAD_STATE_KHR                 0x321C
+typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMKHRPROC) (EGLDisplay dpy, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMU64KHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamKHR (EGLDisplay dpy, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroyStreamKHR (EGLDisplay dpy, EGLStreamKHR stream);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLint *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamu64KHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLuint64KHR *value);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_KHR_stream */
+
+#ifndef EGL_KHR_stream_attrib
+#define EGL_KHR_stream_attrib 1
+#ifdef KHRONOS_SUPPORT_INT64
+typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMATTRIBKHRPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSETSTREAMATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLAttrib value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLAttrib *value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERACQUIREATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERRELEASEATTRIBKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamAttribKHR (EGLDisplay dpy, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglSetStreamAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLAttrib value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLAttrib *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerAcquireAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerReleaseAttribKHR (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_KHR_stream_attrib */
+
+#ifndef EGL_KHR_stream_consumer_gltexture
+#define EGL_KHR_stream_consumer_gltexture 1
+#ifdef EGL_KHR_stream
+#define EGL_CONSUMER_ACQUIRE_TIMEOUT_USEC_KHR 0x321E
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERACQUIREKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERRELEASEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalKHR (EGLDisplay dpy, EGLStreamKHR stream);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerAcquireKHR (EGLDisplay dpy, EGLStreamKHR stream);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerReleaseKHR (EGLDisplay dpy, EGLStreamKHR stream);
+#endif
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_consumer_gltexture */
+
+#ifndef EGL_KHR_stream_cross_process_fd
+#define EGL_KHR_stream_cross_process_fd 1
+typedef int EGLNativeFileDescriptorKHR;
+#ifdef EGL_KHR_stream
+#define EGL_NO_FILE_DESCRIPTOR_KHR        EGL_CAST(EGLNativeFileDescriptorKHR,-1)
+typedef EGLNativeFileDescriptorKHR (EGLAPIENTRYP PFNEGLGETSTREAMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+typedef EGLStreamKHR (EGLAPIENTRYP PFNEGLCREATESTREAMFROMFILEDESCRIPTORKHRPROC) (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLNativeFileDescriptorKHR EGLAPIENTRY eglGetStreamFileDescriptorKHR (EGLDisplay dpy, EGLStreamKHR stream);
+EGLAPI EGLStreamKHR EGLAPIENTRY eglCreateStreamFromFileDescriptorKHR (EGLDisplay dpy, EGLNativeFileDescriptorKHR file_descriptor);
+#endif
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_cross_process_fd */
+
+#ifndef EGL_KHR_stream_fifo
+#define EGL_KHR_stream_fifo 1
+#ifdef EGL_KHR_stream
+#define EGL_STREAM_FIFO_LENGTH_KHR        0x31FC
+#define EGL_STREAM_TIME_NOW_KHR           0x31FD
+#define EGL_STREAM_TIME_CONSUMER_KHR      0x31FE
+#define EGL_STREAM_TIME_PRODUCER_KHR      0x31FF
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMTIMEKHRPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamTimeKHR (EGLDisplay dpy, EGLStreamKHR stream, EGLenum attribute, EGLTimeKHR *value);
+#endif
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_fifo */
+
+#ifndef EGL_KHR_stream_producer_aldatalocator
+#define EGL_KHR_stream_producer_aldatalocator 1
+#ifdef EGL_KHR_stream
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_producer_aldatalocator */
+
+#ifndef EGL_KHR_stream_producer_eglsurface
+#define EGL_KHR_stream_producer_eglsurface 1
+#ifdef EGL_KHR_stream
+#define EGL_STREAM_BIT_KHR                0x0800
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATESTREAMPRODUCERSURFACEKHRPROC) (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSurface EGLAPIENTRY eglCreateStreamProducerSurfaceKHR (EGLDisplay dpy, EGLConfig config, EGLStreamKHR stream, const EGLint *attrib_list);
+#endif
+#endif /* EGL_KHR_stream */
+#endif /* EGL_KHR_stream_producer_eglsurface */
+
+#ifndef EGL_KHR_surfaceless_context
+#define EGL_KHR_surfaceless_context 1
+#endif /* EGL_KHR_surfaceless_context */
+
+#ifndef EGL_KHR_swap_buffers_with_damage
+#define EGL_KHR_swap_buffers_with_damage 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEKHRPROC) (EGLDisplay dpy, EGLSurface surface, const EGLint *rects, EGLint n_rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageKHR (EGLDisplay dpy, EGLSurface surface, const EGLint *rects, EGLint n_rects);
+#endif
+#endif /* EGL_KHR_swap_buffers_with_damage */
+
+#ifndef EGL_KHR_vg_parent_image
+#define EGL_KHR_vg_parent_image 1
+#define EGL_VG_PARENT_IMAGE_KHR           0x30BA
+#endif /* EGL_KHR_vg_parent_image */
+
+#ifndef EGL_KHR_wait_sync
+#define EGL_KHR_wait_sync 1
+typedef EGLint (EGLAPIENTRYP PFNEGLWAITSYNCKHRPROC) (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLint EGLAPIENTRY eglWaitSyncKHR (EGLDisplay dpy, EGLSyncKHR sync, EGLint flags);
+#endif
+#endif /* EGL_KHR_wait_sync */
+
+#ifndef EGL_ANDROID_GLES_layers
+#define EGL_ANDROID_GLES_layers 1
+#endif /* EGL_ANDROID_GLES_layers */
+
+#ifndef EGL_ANDROID_blob_cache
+#define EGL_ANDROID_blob_cache 1
+typedef khronos_ssize_t EGLsizeiANDROID;
+typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize);
+typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize);
+typedef void (EGLAPIENTRYP PFNEGLSETBLOBCACHEFUNCSANDROIDPROC) (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI void EGLAPIENTRY eglSetBlobCacheFuncsANDROID (EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get);
+#endif
+#endif /* EGL_ANDROID_blob_cache */
+
+#ifndef EGL_ANDROID_create_native_client_buffer
+#define EGL_ANDROID_create_native_client_buffer 1
+#define EGL_NATIVE_BUFFER_USAGE_ANDROID   0x3143
+#define EGL_NATIVE_BUFFER_USAGE_PROTECTED_BIT_ANDROID 0x00000001
+#define EGL_NATIVE_BUFFER_USAGE_RENDERBUFFER_BIT_ANDROID 0x00000002
+#define EGL_NATIVE_BUFFER_USAGE_TEXTURE_BIT_ANDROID 0x00000004
+typedef EGLClientBuffer (EGLAPIENTRYP PFNEGLCREATENATIVECLIENTBUFFERANDROIDPROC) (const EGLint *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLClientBuffer EGLAPIENTRY eglCreateNativeClientBufferANDROID (const EGLint *attrib_list);
+#endif
+#endif /* EGL_ANDROID_create_native_client_buffer */
+
+#ifndef EGL_ANDROID_framebuffer_target
+#define EGL_ANDROID_framebuffer_target 1
+#define EGL_FRAMEBUFFER_TARGET_ANDROID    0x3147
+#endif /* EGL_ANDROID_framebuffer_target */
+
+#ifndef EGL_ANDROID_front_buffer_auto_refresh
+#define EGL_ANDROID_front_buffer_auto_refresh 1
+#define EGL_FRONT_BUFFER_AUTO_REFRESH_ANDROID 0x314C
+#endif /* EGL_ANDROID_front_buffer_auto_refresh */
+
+#ifndef EGL_ANDROID_get_frame_timestamps
+#define EGL_ANDROID_get_frame_timestamps 1
+typedef khronos_stime_nanoseconds_t EGLnsecsANDROID;
+#define EGL_TIMESTAMP_PENDING_ANDROID     EGL_CAST(EGLnsecsANDROID,-2)
+#define EGL_TIMESTAMP_INVALID_ANDROID     EGL_CAST(EGLnsecsANDROID,-1)
+#define EGL_TIMESTAMPS_ANDROID            0x3430
+#define EGL_COMPOSITE_DEADLINE_ANDROID    0x3431
+#define EGL_COMPOSITE_INTERVAL_ANDROID    0x3432
+#define EGL_COMPOSITE_TO_PRESENT_LATENCY_ANDROID 0x3433
+#define EGL_REQUESTED_PRESENT_TIME_ANDROID 0x3434
+#define EGL_RENDERING_COMPLETE_TIME_ANDROID 0x3435
+#define EGL_COMPOSITION_LATCH_TIME_ANDROID 0x3436
+#define EGL_FIRST_COMPOSITION_START_TIME_ANDROID 0x3437
+#define EGL_LAST_COMPOSITION_START_TIME_ANDROID 0x3438
+#define EGL_FIRST_COMPOSITION_GPU_FINISHED_TIME_ANDROID 0x3439
+#define EGL_DISPLAY_PRESENT_TIME_ANDROID  0x343A
+#define EGL_DEQUEUE_READY_TIME_ANDROID    0x343B
+#define EGL_READS_DONE_TIME_ANDROID       0x343C
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETCOMPOSITORTIMINGSUPPORTEDANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETCOMPOSITORTIMINGANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numTimestamps,  const EGLint *names, EGLnsecsANDROID *values);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETNEXTFRAMEIDANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLuint64KHR *frameId);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETFRAMETIMESTAMPSUPPORTEDANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLint timestamp);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETFRAMETIMESTAMPSANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLuint64KHR frameId, EGLint numTimestamps,  const EGLint *timestamps, EGLnsecsANDROID *values);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglGetCompositorTimingSupportedANDROID (EGLDisplay dpy, EGLSurface surface, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetCompositorTimingANDROID (EGLDisplay dpy, EGLSurface surface, EGLint numTimestamps,  const EGLint *names, EGLnsecsANDROID *values);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetNextFrameIdANDROID (EGLDisplay dpy, EGLSurface surface, EGLuint64KHR *frameId);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetFrameTimestampSupportedANDROID (EGLDisplay dpy, EGLSurface surface, EGLint timestamp);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetFrameTimestampsANDROID (EGLDisplay dpy, EGLSurface surface, EGLuint64KHR frameId, EGLint numTimestamps,  const EGLint *timestamps, EGLnsecsANDROID *values);
+#endif
+#endif /* EGL_ANDROID_get_frame_timestamps */
+
+#ifndef EGL_ANDROID_get_native_client_buffer
+#define EGL_ANDROID_get_native_client_buffer 1
+struct AHardwareBuffer;
+typedef EGLClientBuffer (EGLAPIENTRYP PFNEGLGETNATIVECLIENTBUFFERANDROIDPROC) (const struct AHardwareBuffer *buffer);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLClientBuffer EGLAPIENTRY eglGetNativeClientBufferANDROID (const struct AHardwareBuffer *buffer);
+#endif
+#endif /* EGL_ANDROID_get_native_client_buffer */
+
+#ifndef EGL_ANDROID_image_native_buffer
+#define EGL_ANDROID_image_native_buffer 1
+#define EGL_NATIVE_BUFFER_ANDROID         0x3140
+#endif /* EGL_ANDROID_image_native_buffer */
+
+#ifndef EGL_ANDROID_native_fence_sync
+#define EGL_ANDROID_native_fence_sync 1
+#define EGL_SYNC_NATIVE_FENCE_ANDROID     0x3144
+#define EGL_SYNC_NATIVE_FENCE_FD_ANDROID  0x3145
+#define EGL_SYNC_NATIVE_FENCE_SIGNALED_ANDROID 0x3146
+#define EGL_NO_NATIVE_FENCE_FD_ANDROID    -1
+typedef EGLint (EGLAPIENTRYP PFNEGLDUPNATIVEFENCEFDANDROIDPROC) (EGLDisplay dpy, EGLSyncKHR sync);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLint EGLAPIENTRY eglDupNativeFenceFDANDROID (EGLDisplay dpy, EGLSyncKHR sync);
+#endif
+#endif /* EGL_ANDROID_native_fence_sync */
+
+#ifndef EGL_ANDROID_presentation_time
+#define EGL_ANDROID_presentation_time 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLPRESENTATIONTIMEANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLnsecsANDROID time);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglPresentationTimeANDROID (EGLDisplay dpy, EGLSurface surface, EGLnsecsANDROID time);
+#endif
+#endif /* EGL_ANDROID_presentation_time */
+
+#ifndef EGL_ANDROID_recordable
+#define EGL_ANDROID_recordable 1
+#define EGL_RECORDABLE_ANDROID            0x3142
+#endif /* EGL_ANDROID_recordable */
+
+#ifndef EGL_ANGLE_d3d_share_handle_client_buffer
+#define EGL_ANGLE_d3d_share_handle_client_buffer 1
+#define EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE 0x3200
+#endif /* EGL_ANGLE_d3d_share_handle_client_buffer */
+
+#ifndef EGL_ANGLE_device_d3d
+#define EGL_ANGLE_device_d3d 1
+#define EGL_D3D9_DEVICE_ANGLE             0x33A0
+#define EGL_D3D11_DEVICE_ANGLE            0x33A1
+#endif /* EGL_ANGLE_device_d3d */
+
+#ifndef EGL_ANGLE_query_surface_pointer
+#define EGL_ANGLE_query_surface_pointer 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSURFACEPOINTERANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSurface surface, EGLint attribute, void **value);
+#endif
+#endif /* EGL_ANGLE_query_surface_pointer */
+
+#ifndef EGL_ANGLE_surface_d3d_texture_2d_share_handle
+#define EGL_ANGLE_surface_d3d_texture_2d_share_handle 1
+#endif /* EGL_ANGLE_surface_d3d_texture_2d_share_handle */
+
+#ifndef EGL_ANGLE_sync_control_rate
+#define EGL_ANGLE_sync_control_rate 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETMSCRATEANGLEPROC) (EGLDisplay dpy, EGLSurface surface, EGLint *numerator, EGLint *denominator);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglGetMscRateANGLE (EGLDisplay dpy, EGLSurface surface, EGLint *numerator, EGLint *denominator);
+#endif
+#endif /* EGL_ANGLE_sync_control_rate */
+
+#ifndef EGL_ANGLE_window_fixed_size
+#define EGL_ANGLE_window_fixed_size 1
+#define EGL_FIXED_SIZE_ANGLE              0x3201
+#endif /* EGL_ANGLE_window_fixed_size */
+
+#ifndef EGL_ARM_image_format
+#define EGL_ARM_image_format 1
+#define EGL_COLOR_COMPONENT_TYPE_UNSIGNED_INTEGER_ARM 0x3287
+#define EGL_COLOR_COMPONENT_TYPE_INTEGER_ARM 0x3288
+#endif /* EGL_ARM_image_format */
+
+#ifndef EGL_ARM_implicit_external_sync
+#define EGL_ARM_implicit_external_sync 1
+#define EGL_SYNC_PRIOR_COMMANDS_IMPLICIT_EXTERNAL_ARM 0x328A
+#endif /* EGL_ARM_implicit_external_sync */
+
+#ifndef EGL_ARM_pixmap_multisample_discard
+#define EGL_ARM_pixmap_multisample_discard 1
+#define EGL_DISCARD_SAMPLES_ARM           0x3286
+#endif /* EGL_ARM_pixmap_multisample_discard */
+
+#ifndef EGL_EXT_bind_to_front
+#define EGL_EXT_bind_to_front 1
+#define EGL_FRONT_BUFFER_EXT              0x3464
+#endif /* EGL_EXT_bind_to_front */
+
+#ifndef EGL_EXT_buffer_age
+#define EGL_EXT_buffer_age 1
+#define EGL_BUFFER_AGE_EXT                0x313D
+#endif /* EGL_EXT_buffer_age */
+
+#ifndef EGL_EXT_client_extensions
+#define EGL_EXT_client_extensions 1
+#endif /* EGL_EXT_client_extensions */
+
+#ifndef EGL_EXT_client_sync
+#define EGL_EXT_client_sync 1
+#define EGL_SYNC_CLIENT_EXT               0x3364
+#define EGL_SYNC_CLIENT_SIGNAL_EXT        0x3365
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCLIENTSIGNALSYNCEXTPROC) (EGLDisplay dpy, EGLSync sync, const EGLAttrib *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglClientSignalSyncEXT (EGLDisplay dpy, EGLSync sync, const EGLAttrib *attrib_list);
+#endif
+#endif /* EGL_EXT_client_sync */
+
+#ifndef EGL_EXT_compositor
+#define EGL_EXT_compositor 1
+#define EGL_PRIMARY_COMPOSITOR_CONTEXT_EXT 0x3460
+#define EGL_EXTERNAL_REF_ID_EXT           0x3461
+#define EGL_COMPOSITOR_DROP_NEWEST_FRAME_EXT 0x3462
+#define EGL_COMPOSITOR_KEEP_NEWEST_FRAME_EXT 0x3463
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETCONTEXTLISTEXTPROC) (const EGLint *external_ref_ids, EGLint num_entries);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETCONTEXTATTRIBUTESEXTPROC) (EGLint external_ref_id, const EGLint *context_attributes, EGLint num_entries);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETWINDOWLISTEXTPROC) (EGLint external_ref_id, const EGLint *external_win_ids, EGLint num_entries);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETWINDOWATTRIBUTESEXTPROC) (EGLint external_win_id, const EGLint *window_attributes, EGLint num_entries);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORBINDTEXWINDOWEXTPROC) (EGLint external_win_id);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSETSIZEEXTPROC) (EGLint external_win_id, EGLint width, EGLint height);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCOMPOSITORSWAPPOLICYEXTPROC) (EGLint external_win_id, EGLint policy);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetContextListEXT (const EGLint *external_ref_ids, EGLint num_entries);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetContextAttributesEXT (EGLint external_ref_id, const EGLint *context_attributes, EGLint num_entries);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetWindowListEXT (EGLint external_ref_id, const EGLint *external_win_ids, EGLint num_entries);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetWindowAttributesEXT (EGLint external_win_id, const EGLint *window_attributes, EGLint num_entries);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorBindTexWindowEXT (EGLint external_win_id);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSetSizeEXT (EGLint external_win_id, EGLint width, EGLint height);
+EGLAPI EGLBoolean EGLAPIENTRY eglCompositorSwapPolicyEXT (EGLint external_win_id, EGLint policy);
+#endif
+#endif /* EGL_EXT_compositor */
+
+#ifndef EGL_EXT_config_select_group
+#define EGL_EXT_config_select_group 1
+#define EGL_CONFIG_SELECT_GROUP_EXT       0x34C0
+#endif /* EGL_EXT_config_select_group */
+
+#ifndef EGL_EXT_create_context_robustness
+#define EGL_EXT_create_context_robustness 1
+#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT 0x30BF
+#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_EXT 0x3138
+#define EGL_NO_RESET_NOTIFICATION_EXT     0x31BE
+#define EGL_LOSE_CONTEXT_ON_RESET_EXT     0x31BF
+#endif /* EGL_EXT_create_context_robustness */
+
+#ifndef EGL_EXT_device_base
+#define EGL_EXT_device_base 1
+typedef void *EGLDeviceEXT;
+#define EGL_NO_DEVICE_EXT                 EGL_CAST(EGLDeviceEXT,0)
+#define EGL_BAD_DEVICE_EXT                0x322B
+#define EGL_DEVICE_EXT                    0x322C
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICEATTRIBEXTPROC) (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYDEVICESTRINGEXTPROC) (EGLDeviceEXT device, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICESEXTPROC) (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBEXTPROC) (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDeviceAttribEXT (EGLDeviceEXT device, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryDeviceStringEXT (EGLDeviceEXT device, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDevicesEXT (EGLint max_devices, EGLDeviceEXT *devices, EGLint *num_devices);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribEXT (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+#endif
+#endif /* EGL_EXT_device_base */
+
+#ifndef EGL_EXT_device_drm
+#define EGL_EXT_device_drm 1
+#define EGL_DRM_DEVICE_FILE_EXT           0x3233
+#define EGL_DRM_MASTER_FD_EXT             0x333C
+#endif /* EGL_EXT_device_drm */
+
+#ifndef EGL_EXT_device_drm_render_node
+#define EGL_EXT_device_drm_render_node 1
+#define EGL_DRM_RENDER_NODE_FILE_EXT      0x3377
+#endif /* EGL_EXT_device_drm_render_node */
+
+#ifndef EGL_EXT_device_enumeration
+#define EGL_EXT_device_enumeration 1
+#endif /* EGL_EXT_device_enumeration */
+
+#ifndef EGL_EXT_device_openwf
+#define EGL_EXT_device_openwf 1
+#define EGL_OPENWF_DEVICE_ID_EXT          0x3237
+#define EGL_OPENWF_DEVICE_EXT             0x333D
+#endif /* EGL_EXT_device_openwf */
+
+#ifndef EGL_EXT_device_persistent_id
+#define EGL_EXT_device_persistent_id 1
+#define EGL_DEVICE_UUID_EXT               0x335C
+#define EGL_DRIVER_UUID_EXT               0x335D
+#define EGL_DRIVER_NAME_EXT               0x335E
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDEVICEBINARYEXTPROC) (EGLDeviceEXT device, EGLint name, EGLint max_size, void *value, EGLint *size);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDeviceBinaryEXT (EGLDeviceEXT device, EGLint name, EGLint max_size, void *value, EGLint *size);
+#endif
+#endif /* EGL_EXT_device_persistent_id */
+
+#ifndef EGL_EXT_device_query
+#define EGL_EXT_device_query 1
+#endif /* EGL_EXT_device_query */
+
+#ifndef EGL_EXT_device_query_name
+#define EGL_EXT_device_query_name 1
+#define EGL_RENDERER_EXT                  0x335F
+#endif /* EGL_EXT_device_query_name */
+
+#ifndef EGL_EXT_explicit_device
+#define EGL_EXT_explicit_device 1
+#endif /* EGL_EXT_explicit_device */
+
+#ifndef EGL_EXT_gl_colorspace_bt2020_linear
+#define EGL_EXT_gl_colorspace_bt2020_linear 1
+#define EGL_GL_COLORSPACE_BT2020_LINEAR_EXT 0x333F
+#endif /* EGL_EXT_gl_colorspace_bt2020_linear */
+
+#ifndef EGL_EXT_gl_colorspace_bt2020_pq
+#define EGL_EXT_gl_colorspace_bt2020_pq 1
+#define EGL_GL_COLORSPACE_BT2020_PQ_EXT   0x3340
+#endif /* EGL_EXT_gl_colorspace_bt2020_pq */
+
+#ifndef EGL_EXT_gl_colorspace_display_p3
+#define EGL_EXT_gl_colorspace_display_p3 1
+#define EGL_GL_COLORSPACE_DISPLAY_P3_EXT  0x3363
+#endif /* EGL_EXT_gl_colorspace_display_p3 */
+
+#ifndef EGL_EXT_gl_colorspace_display_p3_linear
+#define EGL_EXT_gl_colorspace_display_p3_linear 1
+#define EGL_GL_COLORSPACE_DISPLAY_P3_LINEAR_EXT 0x3362
+#endif /* EGL_EXT_gl_colorspace_display_p3_linear */
+
+#ifndef EGL_EXT_gl_colorspace_display_p3_passthrough
+#define EGL_EXT_gl_colorspace_display_p3_passthrough 1
+#define EGL_GL_COLORSPACE_DISPLAY_P3_PASSTHROUGH_EXT 0x3490
+#endif /* EGL_EXT_gl_colorspace_display_p3_passthrough */
+
+#ifndef EGL_EXT_gl_colorspace_scrgb
+#define EGL_EXT_gl_colorspace_scrgb 1
+#define EGL_GL_COLORSPACE_SCRGB_EXT       0x3351
+#endif /* EGL_EXT_gl_colorspace_scrgb */
+
+#ifndef EGL_EXT_gl_colorspace_scrgb_linear
+#define EGL_EXT_gl_colorspace_scrgb_linear 1
+#define EGL_GL_COLORSPACE_SCRGB_LINEAR_EXT 0x3350
+#endif /* EGL_EXT_gl_colorspace_scrgb_linear */
+
+#ifndef EGL_EXT_image_dma_buf_import
+#define EGL_EXT_image_dma_buf_import 1
+#define EGL_LINUX_DMA_BUF_EXT             0x3270
+#define EGL_LINUX_DRM_FOURCC_EXT          0x3271
+#define EGL_DMA_BUF_PLANE0_FD_EXT         0x3272
+#define EGL_DMA_BUF_PLANE0_OFFSET_EXT     0x3273
+#define EGL_DMA_BUF_PLANE0_PITCH_EXT      0x3274
+#define EGL_DMA_BUF_PLANE1_FD_EXT         0x3275
+#define EGL_DMA_BUF_PLANE1_OFFSET_EXT     0x3276
+#define EGL_DMA_BUF_PLANE1_PITCH_EXT      0x3277
+#define EGL_DMA_BUF_PLANE2_FD_EXT         0x3278
+#define EGL_DMA_BUF_PLANE2_OFFSET_EXT     0x3279
+#define EGL_DMA_BUF_PLANE2_PITCH_EXT      0x327A
+#define EGL_YUV_COLOR_SPACE_HINT_EXT      0x327B
+#define EGL_SAMPLE_RANGE_HINT_EXT         0x327C
+#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT 0x327D
+#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT 0x327E
+#define EGL_ITU_REC601_EXT                0x327F
+#define EGL_ITU_REC709_EXT                0x3280
+#define EGL_ITU_REC2020_EXT               0x3281
+#define EGL_YUV_FULL_RANGE_EXT            0x3282
+#define EGL_YUV_NARROW_RANGE_EXT          0x3283
+#define EGL_YUV_CHROMA_SITING_0_EXT       0x3284
+#define EGL_YUV_CHROMA_SITING_0_5_EXT     0x3285
+#endif /* EGL_EXT_image_dma_buf_import */
+
+#ifndef EGL_EXT_image_dma_buf_import_modifiers
+#define EGL_EXT_image_dma_buf_import_modifiers 1
+#define EGL_DMA_BUF_PLANE3_FD_EXT         0x3440
+#define EGL_DMA_BUF_PLANE3_OFFSET_EXT     0x3441
+#define EGL_DMA_BUF_PLANE3_PITCH_EXT      0x3442
+#define EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT 0x3443
+#define EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT 0x3444
+#define EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT 0x3445
+#define EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT 0x3446
+#define EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT 0x3447
+#define EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT 0x3448
+#define EGL_DMA_BUF_PLANE3_MODIFIER_LO_EXT 0x3449
+#define EGL_DMA_BUF_PLANE3_MODIFIER_HI_EXT 0x344A
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDMABUFFORMATSEXTPROC) (EGLDisplay dpy, EGLint max_formats, EGLint *formats, EGLint *num_formats);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDMABUFMODIFIERSEXTPROC) (EGLDisplay dpy, EGLint format, EGLint max_modifiers, EGLuint64KHR *modifiers, EGLBoolean *external_only, EGLint *num_modifiers);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDmaBufFormatsEXT (EGLDisplay dpy, EGLint max_formats, EGLint *formats, EGLint *num_formats);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDmaBufModifiersEXT (EGLDisplay dpy, EGLint format, EGLint max_modifiers, EGLuint64KHR *modifiers, EGLBoolean *external_only, EGLint *num_modifiers);
+#endif
+#endif /* EGL_EXT_image_dma_buf_import_modifiers */
+
+#ifndef EGL_EXT_image_gl_colorspace
+#define EGL_EXT_image_gl_colorspace 1
+#define EGL_GL_COLORSPACE_DEFAULT_EXT     0x314D
+#endif /* EGL_EXT_image_gl_colorspace */
+
+#ifndef EGL_EXT_image_implicit_sync_control
+#define EGL_EXT_image_implicit_sync_control 1
+#define EGL_IMPORT_SYNC_TYPE_EXT          0x3470
+#define EGL_IMPORT_IMPLICIT_SYNC_EXT      0x3471
+#define EGL_IMPORT_EXPLICIT_SYNC_EXT      0x3472
+#endif /* EGL_EXT_image_implicit_sync_control */
+
+#ifndef EGL_EXT_multiview_window
+#define EGL_EXT_multiview_window 1
+#define EGL_MULTIVIEW_VIEW_COUNT_EXT      0x3134
+#endif /* EGL_EXT_multiview_window */
+
+#ifndef EGL_EXT_output_base
+#define EGL_EXT_output_base 1
+typedef void *EGLOutputLayerEXT;
+typedef void *EGLOutputPortEXT;
+#define EGL_NO_OUTPUT_LAYER_EXT           EGL_CAST(EGLOutputLayerEXT,0)
+#define EGL_NO_OUTPUT_PORT_EXT            EGL_CAST(EGLOutputPortEXT,0)
+#define EGL_BAD_OUTPUT_LAYER_EXT          0x322D
+#define EGL_BAD_OUTPUT_PORT_EXT           0x322E
+#define EGL_SWAP_INTERVAL_EXT             0x322F
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTLAYERSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETOUTPUTPORTSEXTPROC) (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTLAYERSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTATTRIBEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value);
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYOUTPUTPORTSTRINGEXTPROC) (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputLayersEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputLayerEXT *layers, EGLint max_layers, EGLint *num_layers);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetOutputPortsEXT (EGLDisplay dpy, const EGLAttrib *attrib_list, EGLOutputPortEXT *ports, EGLint max_ports, EGLint *num_ports);
+EGLAPI EGLBoolean EGLAPIENTRY eglOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputLayerAttribEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryOutputLayerStringEXT (EGLDisplay dpy, EGLOutputLayerEXT layer, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib value);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryOutputPortAttribEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint attribute, EGLAttrib *value);
+EGLAPI const char *EGLAPIENTRY eglQueryOutputPortStringEXT (EGLDisplay dpy, EGLOutputPortEXT port, EGLint name);
+#endif
+#endif /* EGL_EXT_output_base */
+
+#ifndef EGL_EXT_output_drm
+#define EGL_EXT_output_drm 1
+#define EGL_DRM_CRTC_EXT                  0x3234
+#define EGL_DRM_PLANE_EXT                 0x3235
+#define EGL_DRM_CONNECTOR_EXT             0x3236
+#endif /* EGL_EXT_output_drm */
+
+#ifndef EGL_EXT_output_openwf
+#define EGL_EXT_output_openwf 1
+#define EGL_OPENWF_PIPELINE_ID_EXT        0x3238
+#define EGL_OPENWF_PORT_ID_EXT            0x3239
+#endif /* EGL_EXT_output_openwf */
+
+#ifndef EGL_EXT_pixel_format_float
+#define EGL_EXT_pixel_format_float 1
+#define EGL_COLOR_COMPONENT_TYPE_EXT      0x3339
+#define EGL_COLOR_COMPONENT_TYPE_FIXED_EXT 0x333A
+#define EGL_COLOR_COMPONENT_TYPE_FLOAT_EXT 0x333B
+#endif /* EGL_EXT_pixel_format_float */
+
+#ifndef EGL_EXT_platform_base
+#define EGL_EXT_platform_base 1
+typedef EGLDisplay (EGLAPIENTRYP PFNEGLGETPLATFORMDISPLAYEXTPROC) (EGLenum platform, void *native_display, const EGLint *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMWINDOWSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list);
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPLATFORMPIXMAPSURFACEEXTPROC) (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLDisplay EGLAPIENTRY eglGetPlatformDisplayEXT (EGLenum platform, void *native_display, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformWindowSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_window, const EGLint *attrib_list);
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePlatformPixmapSurfaceEXT (EGLDisplay dpy, EGLConfig config, void *native_pixmap, const EGLint *attrib_list);
+#endif
+#endif /* EGL_EXT_platform_base */
+
+#ifndef EGL_EXT_platform_device
+#define EGL_EXT_platform_device 1
+#define EGL_PLATFORM_DEVICE_EXT           0x313F
+#endif /* EGL_EXT_platform_device */
+
+#ifndef EGL_EXT_platform_wayland
+#define EGL_EXT_platform_wayland 1
+#define EGL_PLATFORM_WAYLAND_EXT          0x31D8
+#endif /* EGL_EXT_platform_wayland */
+
+#ifndef EGL_EXT_platform_x11
+#define EGL_EXT_platform_x11 1
+#define EGL_PLATFORM_X11_EXT              0x31D5
+#define EGL_PLATFORM_X11_SCREEN_EXT       0x31D6
+#endif /* EGL_EXT_platform_x11 */
+
+#ifndef EGL_EXT_platform_xcb
+#define EGL_EXT_platform_xcb 1
+#define EGL_PLATFORM_XCB_EXT              0x31DC
+#define EGL_PLATFORM_XCB_SCREEN_EXT       0x31DE
+#endif /* EGL_EXT_platform_xcb */
+
+#ifndef EGL_EXT_present_opaque
+#define EGL_EXT_present_opaque 1
+#define EGL_PRESENT_OPAQUE_EXT            0x31DF
+#endif /* EGL_EXT_present_opaque */
+
+#ifndef EGL_EXT_protected_content
+#define EGL_EXT_protected_content 1
+#define EGL_PROTECTED_CONTENT_EXT         0x32C0
+#endif /* EGL_EXT_protected_content */
+
+#ifndef EGL_EXT_protected_surface
+#define EGL_EXT_protected_surface 1
+#endif /* EGL_EXT_protected_surface */
+
+#ifndef EGL_EXT_stream_consumer_egloutput
+#define EGL_EXT_stream_consumer_egloutput 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMEROUTPUTEXTPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerOutputEXT (EGLDisplay dpy, EGLStreamKHR stream, EGLOutputLayerEXT layer);
+#endif
+#endif /* EGL_EXT_stream_consumer_egloutput */
+
+#ifndef EGL_EXT_surface_CTA861_3_metadata
+#define EGL_EXT_surface_CTA861_3_metadata 1
+#define EGL_CTA861_3_MAX_CONTENT_LIGHT_LEVEL_EXT 0x3360
+#define EGL_CTA861_3_MAX_FRAME_AVERAGE_LEVEL_EXT 0x3361
+#endif /* EGL_EXT_surface_CTA861_3_metadata */
+
+#ifndef EGL_EXT_surface_SMPTE2086_metadata
+#define EGL_EXT_surface_SMPTE2086_metadata 1
+#define EGL_SMPTE2086_DISPLAY_PRIMARY_RX_EXT 0x3341
+#define EGL_SMPTE2086_DISPLAY_PRIMARY_RY_EXT 0x3342
+#define EGL_SMPTE2086_DISPLAY_PRIMARY_GX_EXT 0x3343
+#define EGL_SMPTE2086_DISPLAY_PRIMARY_GY_EXT 0x3344
+#define EGL_SMPTE2086_DISPLAY_PRIMARY_BX_EXT 0x3345
+#define EGL_SMPTE2086_DISPLAY_PRIMARY_BY_EXT 0x3346
+#define EGL_SMPTE2086_WHITE_POINT_X_EXT   0x3347
+#define EGL_SMPTE2086_WHITE_POINT_Y_EXT   0x3348
+#define EGL_SMPTE2086_MAX_LUMINANCE_EXT   0x3349
+#define EGL_SMPTE2086_MIN_LUMINANCE_EXT   0x334A
+#define EGL_METADATA_SCALING_EXT          50000
+#endif /* EGL_EXT_surface_SMPTE2086_metadata */
+
+#ifndef EGL_EXT_surface_compression
+#define EGL_EXT_surface_compression 1
+#define EGL_SURFACE_COMPRESSION_EXT       0x34B0
+#define EGL_SURFACE_COMPRESSION_PLANE1_EXT 0x328E
+#define EGL_SURFACE_COMPRESSION_PLANE2_EXT 0x328F
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_NONE_EXT 0x34B1
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_DEFAULT_EXT 0x34B2
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_1BPC_EXT 0x34B4
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_2BPC_EXT 0x34B5
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_3BPC_EXT 0x34B6
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_4BPC_EXT 0x34B7
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_5BPC_EXT 0x34B8
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_6BPC_EXT 0x34B9
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_7BPC_EXT 0x34BA
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_8BPC_EXT 0x34BB
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_9BPC_EXT 0x34BC
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_10BPC_EXT 0x34BD
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_11BPC_EXT 0x34BE
+#define EGL_SURFACE_COMPRESSION_FIXED_RATE_12BPC_EXT 0x34BF
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSUPPORTEDCOMPRESSIONRATESEXTPROC) (EGLDisplay dpy, EGLConfig config, const EGLAttrib *attrib_list, EGLint *rates, EGLint rate_size, EGLint *num_rates);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQuerySupportedCompressionRatesEXT (EGLDisplay dpy, EGLConfig config, const EGLAttrib *attrib_list, EGLint *rates, EGLint rate_size, EGLint *num_rates);
+#endif
+#endif /* EGL_EXT_surface_compression */
+
+#ifndef EGL_EXT_swap_buffers_with_damage
+#define EGL_EXT_swap_buffers_with_damage 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHDAMAGEEXTPROC) (EGLDisplay dpy, EGLSurface surface, const EGLint *rects, EGLint n_rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT (EGLDisplay dpy, EGLSurface surface, const EGLint *rects, EGLint n_rects);
+#endif
+#endif /* EGL_EXT_swap_buffers_with_damage */
+
+#ifndef EGL_EXT_sync_reuse
+#define EGL_EXT_sync_reuse 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNSIGNALSYNCEXTPROC) (EGLDisplay dpy, EGLSync sync, const EGLAttrib *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglUnsignalSyncEXT (EGLDisplay dpy, EGLSync sync, const EGLAttrib *attrib_list);
+#endif
+#endif /* EGL_EXT_sync_reuse */
+
+#ifndef EGL_EXT_yuv_surface
+#define EGL_EXT_yuv_surface 1
+#define EGL_YUV_ORDER_EXT                 0x3301
+#define EGL_YUV_NUMBER_OF_PLANES_EXT      0x3311
+#define EGL_YUV_SUBSAMPLE_EXT             0x3312
+#define EGL_YUV_DEPTH_RANGE_EXT           0x3317
+#define EGL_YUV_CSC_STANDARD_EXT          0x330A
+#define EGL_YUV_PLANE_BPP_EXT             0x331A
+#define EGL_YUV_BUFFER_EXT                0x3300
+#define EGL_YUV_ORDER_YUV_EXT             0x3302
+#define EGL_YUV_ORDER_YVU_EXT             0x3303
+#define EGL_YUV_ORDER_YUYV_EXT            0x3304
+#define EGL_YUV_ORDER_UYVY_EXT            0x3305
+#define EGL_YUV_ORDER_YVYU_EXT            0x3306
+#define EGL_YUV_ORDER_VYUY_EXT            0x3307
+#define EGL_YUV_ORDER_AYUV_EXT            0x3308
+#define EGL_YUV_SUBSAMPLE_4_2_0_EXT       0x3313
+#define EGL_YUV_SUBSAMPLE_4_2_2_EXT       0x3314
+#define EGL_YUV_SUBSAMPLE_4_4_4_EXT       0x3315
+#define EGL_YUV_DEPTH_RANGE_LIMITED_EXT   0x3318
+#define EGL_YUV_DEPTH_RANGE_FULL_EXT      0x3319
+#define EGL_YUV_CSC_STANDARD_601_EXT      0x330B
+#define EGL_YUV_CSC_STANDARD_709_EXT      0x330C
+#define EGL_YUV_CSC_STANDARD_2020_EXT     0x330D
+#define EGL_YUV_PLANE_BPP_0_EXT           0x331B
+#define EGL_YUV_PLANE_BPP_8_EXT           0x331C
+#define EGL_YUV_PLANE_BPP_10_EXT          0x331D
+#endif /* EGL_EXT_yuv_surface */
+
+#ifndef EGL_HI_clientpixmap
+#define EGL_HI_clientpixmap 1
+struct EGLClientPixmapHI {
+    void  *pData;
+    EGLint iWidth;
+    EGLint iHeight;
+    EGLint iStride;
+};
+#define EGL_CLIENT_PIXMAP_POINTER_HI      0x8F74
+typedef EGLSurface (EGLAPIENTRYP PFNEGLCREATEPIXMAPSURFACEHIPROC) (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSurface EGLAPIENTRY eglCreatePixmapSurfaceHI (EGLDisplay dpy, EGLConfig config, struct EGLClientPixmapHI *pixmap);
+#endif
+#endif /* EGL_HI_clientpixmap */
+
+#ifndef EGL_HI_colorformats
+#define EGL_HI_colorformats 1
+#define EGL_COLOR_FORMAT_HI               0x8F70
+#define EGL_COLOR_RGB_HI                  0x8F71
+#define EGL_COLOR_RGBA_HI                 0x8F72
+#define EGL_COLOR_ARGB_HI                 0x8F73
+#endif /* EGL_HI_colorformats */
+
+#ifndef EGL_IMG_context_priority
+#define EGL_IMG_context_priority 1
+#define EGL_CONTEXT_PRIORITY_LEVEL_IMG    0x3100
+#define EGL_CONTEXT_PRIORITY_HIGH_IMG     0x3101
+#define EGL_CONTEXT_PRIORITY_MEDIUM_IMG   0x3102
+#define EGL_CONTEXT_PRIORITY_LOW_IMG      0x3103
+#endif /* EGL_IMG_context_priority */
+
+#ifndef EGL_IMG_image_plane_attribs
+#define EGL_IMG_image_plane_attribs 1
+#define EGL_NATIVE_BUFFER_MULTIPLANE_SEPARATE_IMG 0x3105
+#define EGL_NATIVE_BUFFER_PLANE_OFFSET_IMG 0x3106
+#endif /* EGL_IMG_image_plane_attribs */
+
+#ifndef EGL_MESA_drm_image
+#define EGL_MESA_drm_image 1
+#define EGL_DRM_BUFFER_FORMAT_MESA        0x31D0
+#define EGL_DRM_BUFFER_USE_MESA           0x31D1
+#define EGL_DRM_BUFFER_FORMAT_ARGB32_MESA 0x31D2
+#define EGL_DRM_BUFFER_MESA               0x31D3
+#define EGL_DRM_BUFFER_STRIDE_MESA        0x31D4
+#define EGL_DRM_BUFFER_USE_SCANOUT_MESA   0x00000001
+#define EGL_DRM_BUFFER_USE_SHARE_MESA     0x00000002
+#define EGL_DRM_BUFFER_USE_CURSOR_MESA    0x00000004
+typedef EGLImageKHR (EGLAPIENTRYP PFNEGLCREATEDRMIMAGEMESAPROC) (EGLDisplay dpy, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDRMIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLImageKHR EGLAPIENTRY eglCreateDRMImageMESA (EGLDisplay dpy, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglExportDRMImageMESA (EGLDisplay dpy, EGLImageKHR image, EGLint *name, EGLint *handle, EGLint *stride);
+#endif
+#endif /* EGL_MESA_drm_image */
+
+#ifndef EGL_MESA_image_dma_buf_export
+#define EGL_MESA_image_dma_buf_export 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEQUERYMESAPROC) (EGLDisplay dpy, EGLImageKHR image, int *fourcc, int *num_planes, EGLuint64KHR *modifiers);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTDMABUFIMAGEMESAPROC) (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageQueryMESA (EGLDisplay dpy, EGLImageKHR image, int *fourcc, int *num_planes, EGLuint64KHR *modifiers);
+EGLAPI EGLBoolean EGLAPIENTRY eglExportDMABUFImageMESA (EGLDisplay dpy, EGLImageKHR image, int *fds, EGLint *strides, EGLint *offsets);
+#endif
+#endif /* EGL_MESA_image_dma_buf_export */
+
+#ifndef EGL_MESA_platform_gbm
+#define EGL_MESA_platform_gbm 1
+#define EGL_PLATFORM_GBM_MESA             0x31D7
+#endif /* EGL_MESA_platform_gbm */
+
+#ifndef EGL_MESA_platform_surfaceless
+#define EGL_MESA_platform_surfaceless 1
+#define EGL_PLATFORM_SURFACELESS_MESA     0x31DD
+#endif /* EGL_MESA_platform_surfaceless */
+
+#ifndef EGL_MESA_query_driver
+#define EGL_MESA_query_driver 1
+typedef char *(EGLAPIENTRYP PFNEGLGETDISPLAYDRIVERCONFIGPROC) (EGLDisplay dpy);
+typedef const char *(EGLAPIENTRYP PFNEGLGETDISPLAYDRIVERNAMEPROC) (EGLDisplay dpy);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI char *EGLAPIENTRY eglGetDisplayDriverConfig (EGLDisplay dpy);
+EGLAPI const char *EGLAPIENTRY eglGetDisplayDriverName (EGLDisplay dpy);
+#endif
+#endif /* EGL_MESA_query_driver */
+
+#ifndef EGL_NOK_swap_region
+#define EGL_NOK_swap_region 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGIONNOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegionNOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#endif
+#endif /* EGL_NOK_swap_region */
+
+#ifndef EGL_NOK_swap_region2
+#define EGL_NOK_swap_region2 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSREGION2NOKPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegion2NOK (EGLDisplay dpy, EGLSurface surface, EGLint numRects, const EGLint *rects);
+#endif
+#endif /* EGL_NOK_swap_region2 */
+
+#ifndef EGL_NOK_texture_from_pixmap
+#define EGL_NOK_texture_from_pixmap 1
+#define EGL_Y_INVERTED_NOK                0x307F
+#endif /* EGL_NOK_texture_from_pixmap */
+
+#ifndef EGL_NV_3dvision_surface
+#define EGL_NV_3dvision_surface 1
+#define EGL_AUTO_STEREO_NV                0x3136
+#endif /* EGL_NV_3dvision_surface */
+
+#ifndef EGL_NV_context_priority_realtime
+#define EGL_NV_context_priority_realtime 1
+#define EGL_CONTEXT_PRIORITY_REALTIME_NV  0x3357
+#endif /* EGL_NV_context_priority_realtime */
+
+#ifndef EGL_NV_coverage_sample
+#define EGL_NV_coverage_sample 1
+#define EGL_COVERAGE_BUFFERS_NV           0x30E0
+#define EGL_COVERAGE_SAMPLES_NV           0x30E1
+#endif /* EGL_NV_coverage_sample */
+
+#ifndef EGL_NV_coverage_sample_resolve
+#define EGL_NV_coverage_sample_resolve 1
+#define EGL_COVERAGE_SAMPLE_RESOLVE_NV    0x3131
+#define EGL_COVERAGE_SAMPLE_RESOLVE_DEFAULT_NV 0x3132
+#define EGL_COVERAGE_SAMPLE_RESOLVE_NONE_NV 0x3133
+#endif /* EGL_NV_coverage_sample_resolve */
+
+#ifndef EGL_NV_cuda_event
+#define EGL_NV_cuda_event 1
+#define EGL_CUDA_EVENT_HANDLE_NV          0x323B
+#define EGL_SYNC_CUDA_EVENT_NV            0x323C
+#define EGL_SYNC_CUDA_EVENT_COMPLETE_NV   0x323D
+#endif /* EGL_NV_cuda_event */
+
+#ifndef EGL_NV_depth_nonlinear
+#define EGL_NV_depth_nonlinear 1
+#define EGL_DEPTH_ENCODING_NV             0x30E2
+#define EGL_DEPTH_ENCODING_NONE_NV        0
+#define EGL_DEPTH_ENCODING_NONLINEAR_NV   0x30E3
+#endif /* EGL_NV_depth_nonlinear */
+
+#ifndef EGL_NV_device_cuda
+#define EGL_NV_device_cuda 1
+#define EGL_CUDA_DEVICE_NV                0x323A
+#endif /* EGL_NV_device_cuda */
+
+#ifndef EGL_NV_native_query
+#define EGL_NV_native_query 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEDISPLAYNVPROC) (EGLDisplay dpy, EGLNativeDisplayType *display_id);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEWINDOWNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYNATIVEPIXMAPNVPROC) (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeDisplayNV (EGLDisplay dpy, EGLNativeDisplayType *display_id);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativeWindowNV (EGLDisplay dpy, EGLSurface surf, EGLNativeWindowType *window);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryNativePixmapNV (EGLDisplay dpy, EGLSurface surf, EGLNativePixmapType *pixmap);
+#endif
+#endif /* EGL_NV_native_query */
+
+#ifndef EGL_NV_post_convert_rounding
+#define EGL_NV_post_convert_rounding 1
+#endif /* EGL_NV_post_convert_rounding */
+
+#ifndef EGL_NV_post_sub_buffer
+#define EGL_NV_post_sub_buffer 1
+#define EGL_POST_SUB_BUFFER_SUPPORTED_NV  0x30BE
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLPOSTSUBBUFFERNVPROC) (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglPostSubBufferNV (EGLDisplay dpy, EGLSurface surface, EGLint x, EGLint y, EGLint width, EGLint height);
+#endif
+#endif /* EGL_NV_post_sub_buffer */
+
+#ifndef EGL_NV_quadruple_buffer
+#define EGL_NV_quadruple_buffer 1
+#define EGL_QUADRUPLE_BUFFER_NV           0x3231
+#endif /* EGL_NV_quadruple_buffer */
+
+#ifndef EGL_NV_robustness_video_memory_purge
+#define EGL_NV_robustness_video_memory_purge 1
+#define EGL_GENERATE_RESET_ON_VIDEO_MEMORY_PURGE_NV 0x334C
+#endif /* EGL_NV_robustness_video_memory_purge */
+
+#ifndef EGL_NV_stream_consumer_eglimage
+#define EGL_NV_stream_consumer_eglimage 1
+#define EGL_STREAM_CONSUMER_IMAGE_NV      0x3373
+#define EGL_STREAM_IMAGE_ADD_NV           0x3374
+#define EGL_STREAM_IMAGE_REMOVE_NV        0x3375
+#define EGL_STREAM_IMAGE_AVAILABLE_NV     0x3376
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMIMAGECONSUMERCONNECTNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLint num_modifiers, const EGLuint64KHR *modifiers, const EGLAttrib *attrib_list);
+typedef EGLint (EGLAPIENTRYP PFNEGLQUERYSTREAMCONSUMEREVENTNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLTime timeout, EGLenum *event, EGLAttrib *aux);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMACQUIREIMAGENVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLImage *pImage, EGLSync sync);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMRELEASEIMAGENVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLImage image, EGLSync sync);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamImageConsumerConnectNV (EGLDisplay dpy, EGLStreamKHR stream, EGLint num_modifiers, const EGLuint64KHR *modifiers, const EGLAttrib *attrib_list);
+EGLAPI EGLint EGLAPIENTRY eglQueryStreamConsumerEventNV (EGLDisplay dpy, EGLStreamKHR stream, EGLTime timeout, EGLenum *event, EGLAttrib *aux);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamAcquireImageNV (EGLDisplay dpy, EGLStreamKHR stream, EGLImage *pImage, EGLSync sync);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamReleaseImageNV (EGLDisplay dpy, EGLStreamKHR stream, EGLImage image, EGLSync sync);
+#endif
+#endif /* EGL_NV_stream_consumer_eglimage */
+
+#ifndef EGL_NV_stream_consumer_gltexture_yuv
+#define EGL_NV_stream_consumer_gltexture_yuv 1
+#define EGL_YUV_PLANE0_TEXTURE_UNIT_NV    0x332C
+#define EGL_YUV_PLANE1_TEXTURE_UNIT_NV    0x332D
+#define EGL_YUV_PLANE2_TEXTURE_UNIT_NV    0x332E
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALATTRIBSNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalAttribsNV (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+#endif
+#endif /* EGL_NV_stream_consumer_gltexture_yuv */
+
+#ifndef EGL_NV_stream_cross_display
+#define EGL_NV_stream_cross_display 1
+#define EGL_STREAM_CROSS_DISPLAY_NV       0x334E
+#endif /* EGL_NV_stream_cross_display */
+
+#ifndef EGL_NV_stream_cross_object
+#define EGL_NV_stream_cross_object 1
+#define EGL_STREAM_CROSS_OBJECT_NV        0x334D
+#endif /* EGL_NV_stream_cross_object */
+
+#ifndef EGL_NV_stream_cross_partition
+#define EGL_NV_stream_cross_partition 1
+#define EGL_STREAM_CROSS_PARTITION_NV     0x323F
+#endif /* EGL_NV_stream_cross_partition */
+
+#ifndef EGL_NV_stream_cross_process
+#define EGL_NV_stream_cross_process 1
+#define EGL_STREAM_CROSS_PROCESS_NV       0x3245
+#endif /* EGL_NV_stream_cross_process */
+
+#ifndef EGL_NV_stream_cross_system
+#define EGL_NV_stream_cross_system 1
+#define EGL_STREAM_CROSS_SYSTEM_NV        0x334F
+#endif /* EGL_NV_stream_cross_system */
+
+#ifndef EGL_NV_stream_dma
+#define EGL_NV_stream_dma 1
+#define EGL_STREAM_DMA_NV                 0x3371
+#define EGL_STREAM_DMA_SERVER_NV          0x3372
+#endif /* EGL_NV_stream_dma */
+
+#ifndef EGL_NV_stream_fifo_next
+#define EGL_NV_stream_fifo_next 1
+#define EGL_PENDING_FRAME_NV              0x3329
+#define EGL_STREAM_TIME_PENDING_NV        0x332A
+#endif /* EGL_NV_stream_fifo_next */
+
+#ifndef EGL_NV_stream_fifo_synchronous
+#define EGL_NV_stream_fifo_synchronous 1
+#define EGL_STREAM_FIFO_SYNCHRONOUS_NV    0x3336
+#endif /* EGL_NV_stream_fifo_synchronous */
+
+#ifndef EGL_NV_stream_flush
+#define EGL_NV_stream_flush 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMFLUSHNVPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamFlushNV (EGLDisplay dpy, EGLStreamKHR stream);
+#endif
+#endif /* EGL_NV_stream_flush */
+
+#ifndef EGL_NV_stream_frame_limits
+#define EGL_NV_stream_frame_limits 1
+#define EGL_PRODUCER_MAX_FRAME_HINT_NV    0x3337
+#define EGL_CONSUMER_MAX_FRAME_HINT_NV    0x3338
+#endif /* EGL_NV_stream_frame_limits */
+
+#ifndef EGL_NV_stream_metadata
+#define EGL_NV_stream_metadata 1
+#define EGL_MAX_STREAM_METADATA_BLOCKS_NV 0x3250
+#define EGL_MAX_STREAM_METADATA_BLOCK_SIZE_NV 0x3251
+#define EGL_MAX_STREAM_METADATA_TOTAL_SIZE_NV 0x3252
+#define EGL_PRODUCER_METADATA_NV          0x3253
+#define EGL_CONSUMER_METADATA_NV          0x3254
+#define EGL_PENDING_METADATA_NV           0x3328
+#define EGL_METADATA0_SIZE_NV             0x3255
+#define EGL_METADATA1_SIZE_NV             0x3256
+#define EGL_METADATA2_SIZE_NV             0x3257
+#define EGL_METADATA3_SIZE_NV             0x3258
+#define EGL_METADATA0_TYPE_NV             0x3259
+#define EGL_METADATA1_TYPE_NV             0x325A
+#define EGL_METADATA2_TYPE_NV             0x325B
+#define EGL_METADATA3_TYPE_NV             0x325C
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBNVPROC) (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSETSTREAMMETADATANVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLint n, EGLint offset, EGLint size, const void *data);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYSTREAMMETADATANVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum name, EGLint n, EGLint offset, EGLint size, void *data);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribNV (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+EGLAPI EGLBoolean EGLAPIENTRY eglSetStreamMetadataNV (EGLDisplay dpy, EGLStreamKHR stream, EGLint n, EGLint offset, EGLint size, const void *data);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryStreamMetadataNV (EGLDisplay dpy, EGLStreamKHR stream, EGLenum name, EGLint n, EGLint offset, EGLint size, void *data);
+#endif
+#endif /* EGL_NV_stream_metadata */
+
+#ifndef EGL_NV_stream_origin
+#define EGL_NV_stream_origin 1
+#define EGL_STREAM_FRAME_ORIGIN_X_NV      0x3366
+#define EGL_STREAM_FRAME_ORIGIN_Y_NV      0x3367
+#define EGL_STREAM_FRAME_MAJOR_AXIS_NV    0x3368
+#define EGL_CONSUMER_AUTO_ORIENTATION_NV  0x3369
+#define EGL_PRODUCER_AUTO_ORIENTATION_NV  0x336A
+#define EGL_LEFT_NV                       0x336B
+#define EGL_RIGHT_NV                      0x336C
+#define EGL_TOP_NV                        0x336D
+#define EGL_BOTTOM_NV                     0x336E
+#define EGL_X_AXIS_NV                     0x336F
+#define EGL_Y_AXIS_NV                     0x3370
+#endif /* EGL_NV_stream_origin */
+
+#ifndef EGL_NV_stream_remote
+#define EGL_NV_stream_remote 1
+#define EGL_STREAM_STATE_INITIALIZING_NV  0x3240
+#define EGL_STREAM_TYPE_NV                0x3241
+#define EGL_STREAM_PROTOCOL_NV            0x3242
+#define EGL_STREAM_ENDPOINT_NV            0x3243
+#define EGL_STREAM_LOCAL_NV               0x3244
+#define EGL_STREAM_PRODUCER_NV            0x3247
+#define EGL_STREAM_CONSUMER_NV            0x3248
+#define EGL_STREAM_PROTOCOL_FD_NV         0x3246
+#endif /* EGL_NV_stream_remote */
+
+#ifndef EGL_NV_stream_reset
+#define EGL_NV_stream_reset 1
+#define EGL_SUPPORT_RESET_NV              0x3334
+#define EGL_SUPPORT_REUSE_NV              0x3335
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLRESETSTREAMNVPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglResetStreamNV (EGLDisplay dpy, EGLStreamKHR stream);
+#endif
+#endif /* EGL_NV_stream_reset */
+
+#ifndef EGL_NV_stream_socket
+#define EGL_NV_stream_socket 1
+#define EGL_STREAM_PROTOCOL_SOCKET_NV     0x324B
+#define EGL_SOCKET_HANDLE_NV              0x324C
+#define EGL_SOCKET_TYPE_NV                0x324D
+#endif /* EGL_NV_stream_socket */
+
+#ifndef EGL_NV_stream_socket_inet
+#define EGL_NV_stream_socket_inet 1
+#define EGL_SOCKET_TYPE_INET_NV           0x324F
+#endif /* EGL_NV_stream_socket_inet */
+
+#ifndef EGL_NV_stream_socket_unix
+#define EGL_NV_stream_socket_unix 1
+#define EGL_SOCKET_TYPE_UNIX_NV           0x324E
+#endif /* EGL_NV_stream_socket_unix */
+
+#ifndef EGL_NV_stream_sync
+#define EGL_NV_stream_sync 1
+#define EGL_SYNC_NEW_FRAME_NV             0x321F
+typedef EGLSyncKHR (EGLAPIENTRYP PFNEGLCREATESTREAMSYNCNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncKHR EGLAPIENTRY eglCreateStreamSyncNV (EGLDisplay dpy, EGLStreamKHR stream, EGLenum type, const EGLint *attrib_list);
+#endif
+#endif /* EGL_NV_stream_sync */
+
+#ifndef EGL_NV_sync
+#define EGL_NV_sync 1
+typedef void *EGLSyncNV;
+typedef khronos_utime_nanoseconds_t EGLTimeNV;
+#ifdef KHRONOS_SUPPORT_INT64
+#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE_NV 0x30E6
+#define EGL_SYNC_STATUS_NV                0x30E7
+#define EGL_SIGNALED_NV                   0x30E8
+#define EGL_UNSIGNALED_NV                 0x30E9
+#define EGL_SYNC_FLUSH_COMMANDS_BIT_NV    0x0001
+#define EGL_FOREVER_NV                    0xFFFFFFFFFFFFFFFFull
+#define EGL_ALREADY_SIGNALED_NV           0x30EA
+#define EGL_TIMEOUT_EXPIRED_NV            0x30EB
+#define EGL_CONDITION_SATISFIED_NV        0x30EC
+#define EGL_SYNC_TYPE_NV                  0x30ED
+#define EGL_SYNC_CONDITION_NV             0x30EE
+#define EGL_SYNC_FENCE_NV                 0x30EF
+#define EGL_NO_SYNC_NV                    EGL_CAST(EGLSyncNV,0)
+typedef EGLSyncNV (EGLAPIENTRYP PFNEGLCREATEFENCESYNCNVPROC) (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLDESTROYSYNCNVPROC) (EGLSyncNV sync);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLFENCENVPROC) (EGLSyncNV sync);
+typedef EGLint (EGLAPIENTRYP PFNEGLCLIENTWAITSYNCNVPROC) (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSIGNALSYNCNVPROC) (EGLSyncNV sync, EGLenum mode);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCATTRIBNVPROC) (EGLSyncNV sync, EGLint attribute, EGLint *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLSyncNV EGLAPIENTRY eglCreateFenceSyncNV (EGLDisplay dpy, EGLenum condition, const EGLint *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglDestroySyncNV (EGLSyncNV sync);
+EGLAPI EGLBoolean EGLAPIENTRY eglFenceNV (EGLSyncNV sync);
+EGLAPI EGLint EGLAPIENTRY eglClientWaitSyncNV (EGLSyncNV sync, EGLint flags, EGLTimeNV timeout);
+EGLAPI EGLBoolean EGLAPIENTRY eglSignalSyncNV (EGLSyncNV sync, EGLenum mode);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncAttribNV (EGLSyncNV sync, EGLint attribute, EGLint *value);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_NV_sync */
+
+#ifndef EGL_NV_system_time
+#define EGL_NV_system_time 1
+typedef khronos_utime_nanoseconds_t EGLuint64NV;
+#ifdef KHRONOS_SUPPORT_INT64
+typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMEFREQUENCYNVPROC) (void);
+typedef EGLuint64NV (EGLAPIENTRYP PFNEGLGETSYSTEMTIMENVPROC) (void);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeFrequencyNV (void);
+EGLAPI EGLuint64NV EGLAPIENTRY eglGetSystemTimeNV (void);
+#endif
+#endif /* KHRONOS_SUPPORT_INT64 */
+#endif /* EGL_NV_system_time */
+
+#ifndef EGL_NV_triple_buffer
+#define EGL_NV_triple_buffer 1
+#define EGL_TRIPLE_BUFFER_NV              0x3230
+#endif /* EGL_NV_triple_buffer */
+
+#ifndef EGL_TIZEN_image_native_buffer
+#define EGL_TIZEN_image_native_buffer 1
+#define EGL_NATIVE_BUFFER_TIZEN           0x32A0
+#endif /* EGL_TIZEN_image_native_buffer */
+
+#ifndef EGL_TIZEN_image_native_surface
+#define EGL_TIZEN_image_native_surface 1
+#define EGL_NATIVE_SURFACE_TIZEN          0x32A1
+#endif /* EGL_TIZEN_image_native_surface */
+
+#ifndef EGL_WL_bind_wayland_display
+#define EGL_WL_bind_wayland_display 1
+#define PFNEGLBINDWAYLANDDISPLAYWL PFNEGLBINDWAYLANDDISPLAYWLPROC
+#define PFNEGLUNBINDWAYLANDDISPLAYWL PFNEGLUNBINDWAYLANDDISPLAYWLPROC
+#define PFNEGLQUERYWAYLANDBUFFERWL PFNEGLQUERYWAYLANDBUFFERWLPROC
+struct wl_display;
+struct wl_resource;
+#define EGL_WAYLAND_BUFFER_WL             0x31D5
+#define EGL_WAYLAND_PLANE_WL              0x31D6
+#define EGL_TEXTURE_Y_U_V_WL              0x31D7
+#define EGL_TEXTURE_Y_UV_WL               0x31D8
+#define EGL_TEXTURE_Y_XUXV_WL             0x31D9
+#define EGL_TEXTURE_EXTERNAL_WL           0x31DA
+#define EGL_WAYLAND_Y_INVERTED_WL         0x31DB
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLBINDWAYLANDDISPLAYWLPROC) (EGLDisplay dpy, struct wl_display *display);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNBINDWAYLANDDISPLAYWLPROC) (EGLDisplay dpy, struct wl_display *display);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYWAYLANDBUFFERWLPROC) (EGLDisplay dpy, struct wl_resource *buffer, EGLint attribute, EGLint *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglBindWaylandDisplayWL (EGLDisplay dpy, struct wl_display *display);
+EGLAPI EGLBoolean EGLAPIENTRY eglUnbindWaylandDisplayWL (EGLDisplay dpy, struct wl_display *display);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryWaylandBufferWL (EGLDisplay dpy, struct wl_resource *buffer, EGLint attribute, EGLint *value);
+#endif
+#endif /* EGL_WL_bind_wayland_display */
+
+#ifndef EGL_WL_create_wayland_buffer_from_image
+#define EGL_WL_create_wayland_buffer_from_image 1
+#define PFNEGLCREATEWAYLANDBUFFERFROMIMAGEWL PFNEGLCREATEWAYLANDBUFFERFROMIMAGEWLPROC
+struct wl_buffer;
+typedef struct wl_buffer *(EGLAPIENTRYP PFNEGLCREATEWAYLANDBUFFERFROMIMAGEWLPROC) (EGLDisplay dpy, EGLImageKHR image);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI struct wl_buffer *EGLAPIENTRY eglCreateWaylandBufferFromImageWL (EGLDisplay dpy, EGLImageKHR image);
+#endif
+#endif /* EGL_WL_create_wayland_buffer_from_image */
+
+/* ANGLE EGL extensions */
+#include "eglext_angle.h"
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/thirdparty/angle/include/EGL/eglext_angle.h b/thirdparty/angle/include/EGL/eglext_angle.h
new file mode 100644
index 0000000000..4f3f3264ba
--- /dev/null
+++ b/thirdparty/angle/include/EGL/eglext_angle.h
@@ -0,0 +1,428 @@
+//
+// Copyright 2017 The ANGLE Project Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// eglext_angle.h: ANGLE modifications to the eglext.h header file.
+//   Currently we don't include this file directly, we patch eglext.h
+//   to include it implicitly so it is visible throughout our code.
+
+#ifndef INCLUDE_EGL_EGLEXT_ANGLE_
+#define INCLUDE_EGL_EGLEXT_ANGLE_
+
+// clang-format off
+
+#ifndef EGL_ANGLE_robust_resource_initialization
+#define EGL_ANGLE_robust_resource_initialization 1
+#define EGL_ROBUST_RESOURCE_INITIALIZATION_ANGLE 0x3453
+#endif /* EGL_ANGLE_robust_resource_initialization */
+
+#ifndef EGL_ANGLE_keyed_mutex
+#define EGL_ANGLE_keyed_mutex 1
+#define EGL_DXGI_KEYED_MUTEX_ANGLE        0x33A2
+#endif /* EGL_ANGLE_keyed_mutex */
+
+#ifndef EGL_ANGLE_d3d_texture_client_buffer
+#define EGL_ANGLE_d3d_texture_client_buffer 1
+#define EGL_D3D_TEXTURE_ANGLE 0x33A3
+#define EGL_TEXTURE_OFFSET_X_ANGLE 0x3490
+#define EGL_TEXTURE_OFFSET_Y_ANGLE 0x3491
+#define EGL_D3D11_TEXTURE_PLANE_ANGLE 0x3492
+#define EGL_D3D11_TEXTURE_ARRAY_SLICE_ANGLE 0x3493
+#endif /* EGL_ANGLE_d3d_texture_client_buffer */
+
+#ifndef EGL_ANGLE_software_display
+#define EGL_ANGLE_software_display 1
+#define EGL_SOFTWARE_DISPLAY_ANGLE ((EGLNativeDisplayType)-1)
+#endif /* EGL_ANGLE_software_display */
+
+#ifndef EGL_ANGLE_direct3d_display
+#define EGL_ANGLE_direct3d_display 1
+#define EGL_D3D11_ELSE_D3D9_DISPLAY_ANGLE ((EGLNativeDisplayType)-2)
+#define EGL_D3D11_ONLY_DISPLAY_ANGLE ((EGLNativeDisplayType)-3)
+#endif /* EGL_ANGLE_direct3d_display */
+
+#ifndef EGL_ANGLE_direct_composition
+#define EGL_ANGLE_direct_composition 1
+#define EGL_DIRECT_COMPOSITION_ANGLE 0x33A5
+#endif /* EGL_ANGLE_direct_composition */
+
+#ifndef EGL_ANGLE_platform_angle
+#define EGL_ANGLE_platform_angle 1
+#define EGL_PLATFORM_ANGLE_ANGLE          0x3202
+#define EGL_PLATFORM_ANGLE_TYPE_ANGLE     0x3203
+#define EGL_PLATFORM_ANGLE_MAX_VERSION_MAJOR_ANGLE 0x3204
+#define EGL_PLATFORM_ANGLE_MAX_VERSION_MINOR_ANGLE 0x3205
+#define EGL_PLATFORM_ANGLE_TYPE_DEFAULT_ANGLE 0x3206
+#define EGL_PLATFORM_ANGLE_DEBUG_LAYERS_ENABLED_ANGLE 0x3451
+#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_ANGLE 0x3209
+#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_HARDWARE_ANGLE 0x320A
+#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_NULL_ANGLE 0x345E
+#define EGL_PLATFORM_ANGLE_NATIVE_PLATFORM_TYPE_ANGLE 0x348F
+#endif /* EGL_ANGLE_platform_angle */
+
+#ifndef EGL_ANGLE_platform_angle_d3d
+#define EGL_ANGLE_platform_angle_d3d 1
+#define EGL_PLATFORM_ANGLE_TYPE_D3D9_ANGLE 0x3207
+#define EGL_PLATFORM_ANGLE_TYPE_D3D11_ANGLE 0x3208
+#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_D3D_WARP_ANGLE 0x320B
+#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_D3D_REFERENCE_ANGLE 0x320C
+#define EGL_PLATFORM_ANGLE_ENABLE_AUTOMATIC_TRIM_ANGLE 0x320F
+#endif /* EGL_ANGLE_platform_angle_d3d */
+
+#ifndef EGL_ANGLE_platform_angle_d3d_luid
+#define EGL_ANGLE_platform_angle_d3d_luid 1
+#define EGL_PLATFORM_ANGLE_D3D_LUID_HIGH_ANGLE 0x34A0
+#define EGL_PLATFORM_ANGLE_D3D_LUID_LOW_ANGLE 0x34A1
+#endif /* EGL_ANGLE_platform_angle_d3d_luid */
+
+#ifndef EGL_ANGLE_platform_angle_d3d11on12
+#define EGL_ANGLE_platform_angle_d3d11on12 1
+#define EGL_PLATFORM_ANGLE_D3D11ON12_ANGLE 0x3488
+#endif /* EGL_ANGLE_platform_angle_d3d11on12 */
+
+#ifndef EGL_ANGLE_platform_angle_opengl
+#define EGL_ANGLE_platform_angle_opengl 1
+#define EGL_PLATFORM_ANGLE_TYPE_OPENGL_ANGLE 0x320D
+#define EGL_PLATFORM_ANGLE_TYPE_OPENGLES_ANGLE 0x320E
+#define EGL_PLATFORM_ANGLE_EGL_HANDLE_ANGLE 0x3480
+#endif /* EGL_ANGLE_platform_angle_opengl */
+
+#ifndef EGL_ANGLE_platform_angle_null
+#define EGL_ANGLE_platform_angle_null 1
+#define EGL_PLATFORM_ANGLE_TYPE_NULL_ANGLE 0x33AE
+#endif /* EGL_ANGLE_platform_angle_null */
+
+#ifndef EGL_ANGLE_platform_angle_vulkan
+#define EGL_ANGLE_platform_angle_vulkan 1
+#define EGL_PLATFORM_ANGLE_TYPE_VULKAN_ANGLE 0x3450
+#define EGL_PLATFORM_VULKAN_DISPLAY_MODE_SIMPLE_ANGLE 0x34A4
+#define EGL_PLATFORM_VULKAN_DISPLAY_MODE_HEADLESS_ANGLE 0x34A5
+#endif /* EGL_ANGLE_platform_angle_vulkan */
+
+#ifndef EGL_ANGLE_platform_angle_metal
+#define EGL_ANGLE_platform_angle_metal 1
+#define EGL_PLATFORM_ANGLE_TYPE_METAL_ANGLE 0x3489
+#endif /* EGL_ANGLE_platform_angle_metal  */
+
+#ifndef EGL_ANGLE_platform_angle_device_type_swiftshader
+#define EGL_ANGLE_platform_angle_device_type_swiftshader
+#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_SWIFTSHADER_ANGLE 0x3487
+#endif /* EGL_ANGLE_platform_angle_device_type_swiftshader */
+
+#ifndef EGL_ANGLE_platform_angle_device_type_egl_angle
+#define EGL_ANGLE_platform_angle_device_type_egl_angle
+#define EGL_PLATFORM_ANGLE_DEVICE_TYPE_EGL_ANGLE 0x348E
+#endif /* EGL_ANGLE_platform_angle_device_type_egl_angle */
+
+#ifndef EGL_ANGLE_context_virtualization
+#define EGL_ANGLE_context_virtualization 1
+#define EGL_CONTEXT_VIRTUALIZATION_GROUP_ANGLE 0x3481
+#endif /* EGL_ANGLE_context_virtualization */
+
+#ifndef EGL_ANGLE_platform_angle_device_context_volatile_eagl
+#define EGL_ANGLE_platform_angle_device_context_volatile_eagl 1
+#define EGL_PLATFORM_ANGLE_DEVICE_CONTEXT_VOLATILE_EAGL_ANGLE 0x34A2
+#endif /* EGL_ANGLE_platform_angle_device_context_volatile_eagl */
+
+#ifndef EGL_ANGLE_platform_angle_device_context_volatile_cgl
+#define EGL_ANGLE_platform_angle_device_context_volatile_cgl 1
+#define EGL_PLATFORM_ANGLE_DEVICE_CONTEXT_VOLATILE_CGL_ANGLE 0x34A3
+#endif /* EGL_ANGLE_platform_angle_device_context_volatile_cgl */
+
+#ifndef EGL_ANGLE_platform_angle_device_id
+#define EGL_ANGLE_platform_angle_device_id
+#define EGL_PLATFORM_ANGLE_DEVICE_ID_HIGH_ANGLE 0x34D6
+#define EGL_PLATFORM_ANGLE_DEVICE_ID_LOW_ANGLE 0x34D7
+#define EGL_PLATFORM_ANGLE_DISPLAY_KEY_ANGLE 0x34DC
+#endif /* EGL_ANGLE_platform_angle_device_id */
+
+#ifndef EGL_ANGLE_x11_visual
+#define EGL_ANGLE_x11_visual
+#define EGL_X11_VISUAL_ID_ANGLE 0x33A3
+#endif /* EGL_ANGLE_x11_visual */
+
+#ifndef EGL_ANGLE_surface_orientation
+#define EGL_ANGLE_surface_orientation
+#define EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE 0x33A7
+#define EGL_SURFACE_ORIENTATION_ANGLE 0x33A8
+#define EGL_SURFACE_ORIENTATION_INVERT_X_ANGLE 0x0001
+#define EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE 0x0002
+#endif /* EGL_ANGLE_surface_orientation */
+
+#ifndef EGL_ANGLE_experimental_present_path
+#define EGL_ANGLE_experimental_present_path
+#define EGL_EXPERIMENTAL_PRESENT_PATH_ANGLE 0x33A4
+#define EGL_EXPERIMENTAL_PRESENT_PATH_FAST_ANGLE 0x33A9
+#define EGL_EXPERIMENTAL_PRESENT_PATH_COPY_ANGLE 0x33AA
+#endif /* EGL_ANGLE_experimental_present_path */
+
+#ifndef EGL_ANGLE_stream_producer_d3d_texture
+#define EGL_ANGLE_stream_producer_d3d_texture
+#define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x33AB
+typedef EGLBoolean(EGLAPIENTRYP PFNEGLCREATESTREAMPRODUCERD3DTEXTUREANGLEPROC)(EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+typedef EGLBoolean(EGLAPIENTRYP PFNEGLSTREAMPOSTD3DTEXTUREANGLEPROC)(EGLDisplay dpy, EGLStreamKHR stream, void *texture, const EGLAttrib *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglCreateStreamProducerD3DTextureANGLE(EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamPostD3DTextureANGLE(EGLDisplay dpy, EGLStreamKHR stream, void *texture, const EGLAttrib *attrib_list);
+#endif
+#endif /* EGL_ANGLE_stream_producer_d3d_texture */
+
+#ifndef EGL_ANGLE_create_context_webgl_compatibility
+#define EGL_ANGLE_create_context_webgl_compatibility 1
+#define EGL_CONTEXT_WEBGL_COMPATIBILITY_ANGLE 0x33AC
+#endif /* EGL_ANGLE_create_context_webgl_compatibility */
+
+#ifndef EGL_ANGLE_display_texture_share_group
+#define EGL_ANGLE_display_texture_share_group 1
+#define EGL_DISPLAY_TEXTURE_SHARE_GROUP_ANGLE 0x33AF
+#endif /* EGL_ANGLE_display_texture_share_group */
+
+#ifndef EGL_CHROMIUM_create_context_bind_generates_resource
+#define EGL_CHROMIUM_create_context_bind_generates_resource 1
+#define EGL_CONTEXT_BIND_GENERATES_RESOURCE_CHROMIUM 0x33AD
+#endif /* EGL_CHROMIUM_create_context_bind_generates_resource */
+
+#ifndef EGL_ANGLE_metal_create_context_ownership_identity
+#define EGL_ANGLE_metal_create_context_ownership_identity 1
+#define EGL_CONTEXT_METAL_OWNERSHIP_IDENTITY_ANGLE 0x34D2
+#endif /* EGL_ANGLE_metal_create_context_ownership_identity */
+
+#ifndef EGL_ANGLE_create_context_client_arrays
+#define EGL_ANGLE_create_context_client_arrays 1
+#define EGL_CONTEXT_CLIENT_ARRAYS_ENABLED_ANGLE 0x3452
+#endif /* EGL_ANGLE_create_context_client_arrays */
+
+#ifndef EGL_ANGLE_device_creation
+#define EGL_ANGLE_device_creation 1
+typedef EGLDeviceEXT(EGLAPIENTRYP PFNEGLCREATEDEVICEANGLEPROC) (EGLint device_type, void *native_device, const EGLAttrib *attrib_list);
+typedef EGLBoolean(EGLAPIENTRYP PFNEGLRELEASEDEVICEANGLEPROC) (EGLDeviceEXT device);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLDeviceEXT EGLAPIENTRY eglCreateDeviceANGLE(EGLint device_type, void *native_device, const EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglReleaseDeviceANGLE(EGLDeviceEXT device);
+#endif
+#endif /* EGL_ANGLE_device_creation */
+
+#ifndef EGL_ANGLE_program_cache_control
+#define EGL_ANGLE_program_cache_control 1
+#define EGL_PROGRAM_CACHE_SIZE_ANGLE 0x3455
+#define EGL_PROGRAM_CACHE_KEY_LENGTH_ANGLE 0x3456
+#define EGL_PROGRAM_CACHE_RESIZE_ANGLE 0x3457
+#define EGL_PROGRAM_CACHE_TRIM_ANGLE 0x3458
+#define EGL_CONTEXT_PROGRAM_BINARY_CACHE_ENABLED_ANGLE 0x3459
+typedef EGLint (EGLAPIENTRYP PFNEGLPROGRAMCACHEGETATTRIBANGLEPROC) (EGLDisplay dpy, EGLenum attrib);
+typedef void (EGLAPIENTRYP PFNEGLPROGRAMCACHEQUERYANGLEPROC) (EGLDisplay dpy, EGLint index, void *key, EGLint *keysize, void *binary, EGLint *binarysize);
+typedef void (EGLAPIENTRYP PFNEGLPROGRAMCACHEPOPULATEANGLEPROC) (EGLDisplay dpy, const void *key, EGLint keysize, const void *binary, EGLint binarysize);
+typedef EGLint (EGLAPIENTRYP PFNEGLPROGRAMCACHERESIZEANGLEPROC) (EGLDisplay dpy, EGLint limit, EGLint mode);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLint EGLAPIENTRY eglProgramCacheGetAttribANGLE(EGLDisplay dpy, EGLenum attrib);
+EGLAPI void EGLAPIENTRY eglProgramCacheQueryANGLE(EGLDisplay dpy, EGLint index, void *key, EGLint *keysize, void *binary, EGLint *binarysize);
+EGLAPI void EGLAPIENTRY eglProgramCachePopulateANGLE(EGLDisplay dpy, const void *key, EGLint keysize, const void *binary, EGLint binarysize);
+EGLAPI EGLint EGLAPIENTRY eglProgramCacheResizeANGLE(EGLDisplay dpy, EGLint limit, EGLint mode);
+#endif
+#endif /* EGL_ANGLE_program_cache_control */
+
+#ifndef EGL_ANGLE_iosurface_client_buffer
+#define EGL_ANGLE_iosurface_client_buffer 1
+#define EGL_IOSURFACE_ANGLE 0x3454
+#define EGL_IOSURFACE_PLANE_ANGLE 0x345A
+#define EGL_TEXTURE_RECTANGLE_ANGLE 0x345B
+#define EGL_TEXTURE_TYPE_ANGLE 0x345C
+#define EGL_TEXTURE_INTERNAL_FORMAT_ANGLE 0x345D
+#define EGL_IOSURFACE_USAGE_HINT_ANGLE 0x348A
+#define EGL_IOSURFACE_READ_HINT_ANGLE 0x0001
+#define EGL_IOSURFACE_WRITE_HINT_ANGLE 0x0002
+#define EGL_BIND_TO_TEXTURE_TARGET_ANGLE 0x348D
+#endif /* EGL_ANGLE_iosurface_client_buffer */
+
+#ifndef ANGLE_metal_texture_client_buffer
+#define ANGLE_metal_texture_client_buffer 1
+#define EGL_METAL_TEXTURE_ANGLE 0x34A7
+#endif /* ANGLE_metal_texture_client_buffer */
+
+#ifndef EGL_ANGLE_create_context_extensions_enabled
+#define EGL_ANGLE_create_context_extensions_enabled 1
+#define EGL_EXTENSIONS_ENABLED_ANGLE 0x345F
+#endif /* EGL_ANGLE_create_context_extensions_enabled */
+
+#ifndef EGL_CHROMIUM_sync_control
+#define EGL_CHROMIUM_sync_control 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETSYNCVALUESCHROMIUMPROC) (EGLDisplay dpy,
+                                                             EGLSurface surface,
+                                                             EGLuint64KHR *ust,
+                                                             EGLuint64KHR *msc,
+                                                             EGLuint64KHR *sbc);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglGetSyncValuesCHROMIUM(EGLDisplay dpy,
+                                                             EGLSurface surface,
+                                                             EGLuint64KHR *ust,
+                                                             EGLuint64KHR *msc,
+                                                             EGLuint64KHR *sbc);
+#endif
+#endif /* EGL_CHROMIUM_sync_control */
+
+#ifndef EGL_ANGLE_sync_control_rate
+#define EGL_ANGLE_sync_control_rate 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETMSCRATEANGLEPROC) (EGLDisplay dpy,
+                                                             EGLSurface surface,
+                                                             EGLint *numerator,
+                                                             EGLint *denominator);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglGetMscRateANGLE(EGLDisplay dpy,
+                                                             EGLSurface surface,
+                                                             EGLint *numerator,
+                                                             EGLint *denominator);
+#endif
+#endif /* EGL_ANGLE_sync_control_rate */
+
+#ifndef EGL_ANGLE_power_preference
+#define EGL_ANGLE_power_preference 1
+#define EGL_POWER_PREFERENCE_ANGLE 0x3482
+#define EGL_LOW_POWER_ANGLE 0x0001
+#define EGL_HIGH_POWER_ANGLE 0x0002
+typedef void(EGLAPIENTRYP PFNEGLRELEASEHIGHPOWERGPUANGLEPROC) (EGLDisplay dpy, EGLContext ctx);
+typedef void(EGLAPIENTRYP PFNEGLREACQUIREHIGHPOWERGPUANGLEPROC) (EGLDisplay dpy, EGLContext ctx);
+typedef void(EGLAPIENTRYP PFNEGLHANDLEGPUSWITCHANGLEPROC) (EGLDisplay dpy);
+typedef void(EGLAPIENTRYP PFNEGLFORCEGPUSWITCHANGLEPROC) (EGLDisplay dpy, EGLint gpuIDHigh, EGLint gpuIDLow);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI void EGLAPIENTRY eglReleaseHighPowerGPUANGLE(EGLDisplay dpy, EGLContext ctx);
+EGLAPI void EGLAPIENTRY eglReacquireHighPowerGPUANGLE(EGLDisplay dpy, EGLContext ctx);
+EGLAPI void EGLAPIENTRY eglHandleGPUSwitchANGLE(EGLDisplay dpy);
+EGLAPI void EGLAPIENTRY eglForceGPUSwitchANGLE(EGLDisplay dpy, EGLint gpuIDHigh, EGLint gpuIDLow);
+#endif
+#endif /* EGL_ANGLE_power_preference */
+
+#ifndef EGL_ANGLE_wait_until_work_scheduled
+#define EGL_ANGLE_wait_until_work_scheduled 1
+typedef void(EGLAPIENTRYP PFNEGLWAITUNTILWORKSCHEDULEDANGLEPROC) (EGLDisplay dpy);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI void EGLAPIENTRY eglWaitUntilWorkScheduledANGLE(EGLDisplay dpy);
+#endif
+#endif /* EGL_ANGLE_wait_until_work_scheduled */
+
+#ifndef EGL_ANGLE_feature_control
+#define EGL_ANGLE_feature_control 1
+#define EGL_FEATURE_NAME_ANGLE 0x3460
+#define EGL_FEATURE_CATEGORY_ANGLE 0x3461
+#define EGL_FEATURE_DESCRIPTION_ANGLE 0x3462
+#define EGL_FEATURE_BUG_ANGLE 0x3463
+#define EGL_FEATURE_STATUS_ANGLE 0x3464
+#define EGL_FEATURE_COUNT_ANGLE 0x3465
+#define EGL_FEATURE_OVERRIDES_ENABLED_ANGLE 0x3466
+#define EGL_FEATURE_OVERRIDES_DISABLED_ANGLE 0x3467
+#define EGL_FEATURE_CONDITION_ANGLE 0x3468
+#define EGL_FEATURE_ALL_DISABLED_ANGLE 0x3469
+typedef const char *(EGLAPIENTRYP PFNEGLQUERYSTRINGIANGLEPROC) (EGLDisplay dpy, EGLint name, EGLint index);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLQUERYDISPLAYATTRIBANGLEPROC) (EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI const char *EGLAPIENTRY eglQueryStringiANGLE(EGLDisplay dpy, EGLint name, EGLint index);
+EGLAPI EGLBoolean EGLAPIENTRY eglQueryDisplayAttribANGLE(EGLDisplay dpy, EGLint attribute, EGLAttrib *value);
+#endif
+#endif /* EGL_ANGLE_feature_control */
+
+#ifndef EGL_ANGLE_image_d3d11_texture
+#define EGL_D3D11_TEXTURE_ANGLE 0x3484
+#define EGL_TEXTURE_INTERNAL_FORMAT_ANGLE 0x345D
+#endif /* EGL_ANGLE_image_d3d11_texture */
+
+#ifndef EGL_ANGLE_create_context_backwards_compatible
+#define EGL_ANGLE_create_context_backwards_compatible 1
+#define EGL_CONTEXT_OPENGL_BACKWARDS_COMPATIBLE_ANGLE 0x3483
+#endif /* EGL_ANGLE_create_context_backwards_compatible */
+
+#ifndef EGL_ANGLE_device_cgl
+#define EGL_ANGLE_device_cgl 1
+#define EGL_CGL_CONTEXT_ANGLE 0x3485
+#define EGL_CGL_PIXEL_FORMAT_ANGLE 0x3486
+#endif
+
+#ifndef EGL_ANGLE_ggp_stream_descriptor
+#define EGL_ANGLE_ggp_stream_descriptor 1
+#define EGL_GGP_STREAM_DESCRIPTOR_ANGLE 0x348B
+#endif /* EGL_ANGLE_ggp_stream_descriptor */
+
+#ifndef EGL_ANGLE_swap_with_frame_token
+#define EGL_ANGLE_swap_with_frame_token 1
+typedef khronos_uint64_t EGLFrameTokenANGLE;
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSWAPBUFFERSWITHFRAMETOKENANGLEPROC)(EGLDisplay dpy, EGLSurface surface, EGLFrameTokenANGLE frametoken);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithFrameTokenANGLE(EGLDisplay dpy, EGLSurface surface, EGLFrameTokenANGLE frametoken);
+#endif
+#endif /* EGL_ANGLE_swap_with_frame_token */
+
+#ifndef EGL_ANGLE_prepare_swap_buffers
+#define EGL_ANGLE_prepare_swap_buffers 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLPREPARESWAPBUFFERSANGLEPROC)(EGLDisplay dpy, EGLSurface surface);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglPrepareSwapBuffersANGLE(EGLDisplay dpy, EGLSurface surface);
+#endif
+#endif /* EGL_ANGLE_prepare_swap_buffers */
+
+#ifndef EGL_ANGLE_device_eagl
+#define EGL_ANGLE_device_eagl 1
+#define EGL_EAGL_CONTEXT_ANGLE 0x348C
+#endif
+
+#ifndef EGL_ANGLE_device_metal
+#define EGL_ANGLE_device_metal 1
+#define EGL_METAL_DEVICE_ANGLE 0x34A6
+#endif /* EGL_ANGLE_device_metal */
+
+#ifndef EGL_ANGLE_display_semaphore_share_group
+#define EGL_ANGLE_display_semaphore_share_group 1
+#define EGL_DISPLAY_SEMAPHORE_SHARE_GROUP_ANGLE 0x348D
+#endif /* EGL_ANGLE_display_semaphore_share_group */
+
+#ifndef EGL_ANGLE_external_context_and_surface
+#define EGL_ANGLE_external_context_and_surface 1
+#define EGL_EXTERNAL_CONTEXT_ANGLE 0x348E
+#define EGL_EXTERNAL_SURFACE_ANGLE 0x348F
+#define EGL_EXTERNAL_CONTEXT_SAVE_STATE_ANGLE 0x3490
+#endif /* EGL_ANGLE_external_context_and_surface */
+
+#ifndef EGL_ANGLE_create_surface_swap_interval
+#define EGL_ANGLE_create_surface_swap_interval 1
+#define EGL_SWAP_INTERVAL_ANGLE 0x322F
+#endif /* EGL_ANGLE_create_surface_swap_interval */
+
+#ifndef EGL_ANGLE_device_vulkan
+#define EGL_ANGLE_device_vulkan 1
+#define EGL_VULKAN_VERSION_ANGLE 0x34A8
+#define EGL_VULKAN_INSTANCE_ANGLE 0x34A9
+#define EGL_VULKAN_INSTANCE_EXTENSIONS_ANGLE 0x34AA
+#define EGL_VULKAN_PHYSICAL_DEVICE_ANGLE 0x34AB
+#define EGL_VULKAN_DEVICE_ANGLE 0x34AC
+#define EGL_VULKAN_DEVICE_EXTENSIONS_ANGLE 0x34AD
+#define EGL_VULKAN_FEATURES_ANGLE 0x34AE
+#define EGL_VULKAN_QUEUE_ANGLE 0x34AF
+#define EGL_VULKAN_QUEUE_FAMILIY_INDEX_ANGLE 0x34D0
+#define EGL_VULKAN_GET_INSTANCE_PROC_ADDR 0x34D1
+#endif /* EGL_ANGLE_device_vulkan */
+
+#ifndef EGL_ANGLE_vulkan_image
+#define EGL_ANGLE_vulkan_image
+#define EGL_VULKAN_IMAGE_ANGLE 0x34D3
+#define EGL_VULKAN_IMAGE_CREATE_INFO_HI_ANGLE 0x34D4
+#define EGL_VULKAN_IMAGE_CREATE_INFO_LO_ANGLE 0x34D5
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLEXPORTVKIMAGEANGLEPROC)(EGLDisplay dpy, EGLImage image, void* vk_image, void* vk_image_create_info);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglExportVkImageANGLE(EGLDisplay dpy, EGLImage image, void* vk_image, void* vk_image_create_info);
+#endif
+#endif /* EGL_ANGLE_vulkan_image */
+
+#ifndef EGL_ANGLE_metal_shared_event_sync
+#define EGL_ANGLE_metal_hared_event_sync 1
+#define EGL_SYNC_METAL_SHARED_EVENT_ANGLE 0x34D8
+#define EGL_SYNC_METAL_SHARED_EVENT_OBJECT_ANGLE 0x34D9
+#define EGL_SYNC_METAL_SHARED_EVENT_SIGNAL_VALUE_LO_ANGLE 0x34DA
+#define EGL_SYNC_METAL_SHARED_EVENT_SIGNAL_VALUE_HI_ANGLE 0x34DB
+#define EGL_SYNC_METAL_SHARED_EVENT_SIGNALED_ANGLE 0x34DC
+typedef void* (EGLAPIENTRYP PFNEGLCOPYMETALSHAREDEVENTANGLEPROC)(EGLDisplay dpy, EGLSync sync);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI void *EGLAPIENTRY eglCopyMetalSharedEventANGLE(EGLDisplay dpy, EGLSync sync);
+#endif
+#endif /* EGL_ANGLE_metal_shared_event_sync */
+
+// clang-format on
+
+#endif  // INCLUDE_EGL_EGLEXT_ANGLE_
diff --git a/thirdparty/angle/include/EGL/eglplatform.h b/thirdparty/angle/include/EGL/eglplatform.h
new file mode 100644
index 0000000000..777e985588
--- /dev/null
+++ b/thirdparty/angle/include/EGL/eglplatform.h
@@ -0,0 +1,175 @@
+#ifndef __eglplatform_h_
+#define __eglplatform_h_
+
+/*
+** Copyright 2007-2020 The Khronos Group Inc.
+** SPDX-License-Identifier: Apache-2.0
+*/
+
+/* Platform-specific types and definitions for egl.h
+ *
+ * Adopters may modify khrplatform.h and this file to suit their platform.
+ * You are encouraged to submit all modifications to the Khronos group so that
+ * they can be included in future versions of this file.  Please submit changes
+ * by filing an issue or pull request on the public Khronos EGL Registry, at
+ * https://www.github.com/KhronosGroup/EGL-Registry/
+ */
+
+#include <KHR/khrplatform.h>
+
+/* Macros used in EGL function prototype declarations.
+ *
+ * EGL functions should be prototyped as:
+ *
+ * EGLAPI return-type EGLAPIENTRY eglFunction(arguments);
+ * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments);
+ *
+ * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h
+ */
+
+#ifndef EGLAPI
+#define EGLAPI KHRONOS_APICALL
+#endif
+
+#ifndef EGLAPIENTRY
+#define EGLAPIENTRY  KHRONOS_APIENTRY
+#endif
+#define EGLAPIENTRYP EGLAPIENTRY*
+
+/* The types NativeDisplayType, NativeWindowType, and NativePixmapType
+ * are aliases of window-system-dependent types, such as X Display * or
+ * Windows Device Context. They must be defined in platform-specific
+ * code below. The EGL-prefixed versions of Native*Type are the same
+ * types, renamed in EGL 1.3 so all types in the API start with "EGL".
+ *
+ * Khronos STRONGLY RECOMMENDS that you use the default definitions
+ * provided below, since these changes affect both binary and source
+ * portability of applications using EGL running on different EGL
+ * implementations.
+ */
+
+#if defined(EGL_NO_PLATFORM_SPECIFIC_TYPES)
+
+typedef void *EGLNativeDisplayType;
+typedef void *EGLNativePixmapType;
+typedef void *EGLNativeWindowType;
+
+#elif defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
+#endif
+#include <windows.h>
+
+typedef HDC     EGLNativeDisplayType;
+typedef HBITMAP EGLNativePixmapType;
+
+#if !defined(WINAPI_FAMILY) || (WINAPI_FAMILY == WINAPI_FAMILY_DESKTOP_APP) /* Windows Desktop */
+typedef HWND    EGLNativeWindowType;
+#else /* Windows Store */
+#include <inspectable.h>
+typedef IInspectable* EGLNativeWindowType;
+#endif
+
+#elif defined(__EMSCRIPTEN__)
+
+typedef int EGLNativeDisplayType;
+typedef int EGLNativePixmapType;
+typedef int EGLNativeWindowType;
+
+#elif defined(__WINSCW__) || defined(__SYMBIAN32__)  /* Symbian */
+
+typedef int   EGLNativeDisplayType;
+typedef void *EGLNativePixmapType;
+typedef void *EGLNativeWindowType;
+
+#elif defined(WL_EGL_PLATFORM)
+
+typedef struct wl_display     *EGLNativeDisplayType;
+typedef struct wl_egl_pixmap  *EGLNativePixmapType;
+typedef struct wl_egl_window  *EGLNativeWindowType;
+
+#elif defined(__GBM__)
+
+typedef struct gbm_device  *EGLNativeDisplayType;
+typedef struct gbm_bo      *EGLNativePixmapType;
+typedef void               *EGLNativeWindowType;
+
+#elif defined(__ANDROID__) || defined(ANDROID)
+
+struct ANativeWindow;
+struct egl_native_pixmap_t;
+
+typedef void*                           EGLNativeDisplayType;
+typedef struct egl_native_pixmap_t*     EGLNativePixmapType;
+typedef struct ANativeWindow*           EGLNativeWindowType;
+
+#elif defined(USE_OZONE)
+
+typedef intptr_t EGLNativeDisplayType;
+typedef intptr_t EGLNativePixmapType;
+typedef intptr_t EGLNativeWindowType;
+
+#elif defined(USE_X11)
+
+/* X11 (tentative)  */
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+
+typedef Display *EGLNativeDisplayType;
+typedef Pixmap   EGLNativePixmapType;
+typedef Window   EGLNativeWindowType;
+
+#elif defined(__unix__)
+
+typedef void             *EGLNativeDisplayType;
+typedef khronos_uintptr_t EGLNativePixmapType;
+typedef khronos_uintptr_t EGLNativeWindowType;
+
+#elif defined(__APPLE__)
+
+typedef int   EGLNativeDisplayType;
+typedef void *EGLNativePixmapType;
+typedef void *EGLNativeWindowType;
+
+#elif defined(__HAIKU__)
+
+#include <kernel/image.h>
+
+typedef void              *EGLNativeDisplayType;
+typedef khronos_uintptr_t  EGLNativePixmapType;
+typedef khronos_uintptr_t  EGLNativeWindowType;
+
+#elif defined(__Fuchsia__)
+
+typedef void              *EGLNativeDisplayType;
+typedef khronos_uintptr_t  EGLNativePixmapType;
+typedef khronos_uintptr_t  EGLNativeWindowType;
+
+#else
+#error "Platform not recognized"
+#endif
+
+/* EGL 1.2 types, renamed for consistency in EGL 1.3 */
+typedef EGLNativeDisplayType NativeDisplayType;
+typedef EGLNativePixmapType  NativePixmapType;
+typedef EGLNativeWindowType  NativeWindowType;
+
+
+/* Define EGLint. This must be a signed integral type large enough to contain
+ * all legal attribute names and values passed into and out of EGL, whether
+ * their type is boolean, bitmask, enumerant (symbolic constant), integer,
+ * handle, or other.  While in general a 32-bit integer will suffice, if
+ * handles are 64 bit types, then EGLint should be defined as a signed 64-bit
+ * integer type.
+ */
+typedef khronos_int32_t EGLint;
+
+
+/* C++ / C typecast macros for special EGL handle values */
+#if defined(__cplusplus)
+#define EGL_CAST(type, value) (static_cast<type>(value))
+#else
+#define EGL_CAST(type, value) ((type) (value))
+#endif
+
+#endif /* __eglplatform_h */
diff --git a/thirdparty/angle/include/KHR/khrplatform.h b/thirdparty/angle/include/KHR/khrplatform.h
new file mode 100644
index 0000000000..dd22d92701
--- /dev/null
+++ b/thirdparty/angle/include/KHR/khrplatform.h
@@ -0,0 +1,290 @@
+#ifndef __khrplatform_h_
+#define __khrplatform_h_
+
+/*
+** Copyright (c) 2008-2018 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+**
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+/* Khronos platform-specific types and definitions.
+ *
+ * The master copy of khrplatform.h is maintained in the Khronos EGL
+ * Registry repository at https://github.com/KhronosGroup/EGL-Registry
+ * The last semantic modification to khrplatform.h was at commit ID:
+ *      67a3e0864c2d75ea5287b9f3d2eb74a745936692
+ *
+ * Adopters may modify this file to suit their platform. Adopters are
+ * encouraged to submit platform specific modifications to the Khronos
+ * group so that they can be included in future versions of this file.
+ * Please submit changes by filing pull requests or issues on
+ * the EGL Registry repository linked above.
+ *
+ *
+ * See the Implementer's Guidelines for information about where this file
+ * should be located on your system and for more details of its use:
+ *    http://www.khronos.org/registry/implementers_guide.pdf
+ *
+ * This file should be included as
+ *        #include <KHR/khrplatform.h>
+ * by Khronos client API header files that use its types and defines.
+ *
+ * The types in khrplatform.h should only be used to define API-specific types.
+ *
+ * Types defined in khrplatform.h:
+ *    khronos_int8_t              signed   8  bit
+ *    khronos_uint8_t             unsigned 8  bit
+ *    khronos_int16_t             signed   16 bit
+ *    khronos_uint16_t            unsigned 16 bit
+ *    khronos_int32_t             signed   32 bit
+ *    khronos_uint32_t            unsigned 32 bit
+ *    khronos_int64_t             signed   64 bit
+ *    khronos_uint64_t            unsigned 64 bit
+ *    khronos_intptr_t            signed   same number of bits as a pointer
+ *    khronos_uintptr_t           unsigned same number of bits as a pointer
+ *    khronos_ssize_t             signed   size
+ *    khronos_usize_t             unsigned size
+ *    khronos_float_t             signed   32 bit floating point
+ *    khronos_time_ns_t           unsigned 64 bit time in nanoseconds
+ *    khronos_utime_nanoseconds_t unsigned time interval or absolute time in
+ *                                         nanoseconds
+ *    khronos_stime_nanoseconds_t signed time interval in nanoseconds
+ *    khronos_boolean_enum_t      enumerated boolean type. This should
+ *      only be used as a base type when a client API's boolean type is
+ *      an enum. Client APIs which use an integer or other type for
+ *      booleans cannot use this as the base type for their boolean.
+ *
+ * Tokens defined in khrplatform.h:
+ *
+ *    KHRONOS_FALSE, KHRONOS_TRUE Enumerated boolean false/true values.
+ *
+ *    KHRONOS_SUPPORT_INT64 is 1 if 64 bit integers are supported; otherwise 0.
+ *    KHRONOS_SUPPORT_FLOAT is 1 if floats are supported; otherwise 0.
+ *
+ * Calling convention macros defined in this file:
+ *    KHRONOS_APICALL
+ *    KHRONOS_APIENTRY
+ *    KHRONOS_APIATTRIBUTES
+ *
+ * These may be used in function prototypes as:
+ *
+ *      KHRONOS_APICALL void KHRONOS_APIENTRY funcname(
+ *                                  int arg1,
+ *                                  int arg2) KHRONOS_APIATTRIBUTES;
+ */
+
+#if defined(__SCITECH_SNAP__) && !defined(KHRONOS_STATIC)
+#   define KHRONOS_STATIC 1
+#endif
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APICALL
+ *-------------------------------------------------------------------------
+ * This precedes the return type of the function in the function prototype.
+ */
+#if defined(KHRONOS_STATIC)
+    /* If the preprocessor constant KHRONOS_STATIC is defined, make the
+     * header compatible with static linking. */
+#   define KHRONOS_APICALL
+#elif defined(_WIN32)
+#   define KHRONOS_APICALL __declspec(dllimport)
+#elif defined (__SYMBIAN32__)
+#   define KHRONOS_APICALL IMPORT_C
+#elif defined(__ANDROID__)
+#   define KHRONOS_APICALL __attribute__((visibility("default")))
+#else
+#   define KHRONOS_APICALL
+#endif
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APIENTRY
+ *-------------------------------------------------------------------------
+ * This follows the return type of the function  and precedes the function
+ * name in the function prototype.
+ */
+#if defined(_WIN32) && !defined(_WIN32_WCE) && !defined(__SCITECH_SNAP__)
+    /* Win32 but not WinCE */
+#   define KHRONOS_APIENTRY __stdcall
+#else
+#   define KHRONOS_APIENTRY
+#endif
+
+/*-------------------------------------------------------------------------
+ * Definition of KHRONOS_APIATTRIBUTES
+ *-------------------------------------------------------------------------
+ * This follows the closing parenthesis of the function prototype arguments.
+ */
+#if defined (__ARMCC_2__)
+#define KHRONOS_APIATTRIBUTES __softfp
+#else
+#define KHRONOS_APIATTRIBUTES
+#endif
+
+/*-------------------------------------------------------------------------
+ * basic type definitions
+ *-----------------------------------------------------------------------*/
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || defined(__GNUC__) || defined(__SCO__) || defined(__USLC__)
+
+
+/*
+ * Using <stdint.h>
+ */
+#include <stdint.h>
+typedef int32_t                 khronos_int32_t;
+typedef uint32_t                khronos_uint32_t;
+typedef int64_t                 khronos_int64_t;
+typedef uint64_t                khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#elif defined(__VMS ) || defined(__sgi)
+
+/*
+ * Using <inttypes.h>
+ */
+#include <inttypes.h>
+typedef int32_t                 khronos_int32_t;
+typedef uint32_t                khronos_uint32_t;
+typedef int64_t                 khronos_int64_t;
+typedef uint64_t                khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#elif defined(_WIN32) && !defined(__SCITECH_SNAP__)
+
+/*
+ * Win32
+ */
+typedef __int32                 khronos_int32_t;
+typedef unsigned __int32        khronos_uint32_t;
+typedef __int64                 khronos_int64_t;
+typedef unsigned __int64        khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#elif defined(__sun__) || defined(__digital__)
+
+/*
+ * Sun or Digital
+ */
+typedef int                     khronos_int32_t;
+typedef unsigned int            khronos_uint32_t;
+#if defined(__arch64__) || defined(_LP64)
+typedef long int                khronos_int64_t;
+typedef unsigned long int       khronos_uint64_t;
+#else
+typedef long long int           khronos_int64_t;
+typedef unsigned long long int  khronos_uint64_t;
+#endif /* __arch64__ */
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#elif 0
+
+/*
+ * Hypothetical platform with no float or int64 support
+ */
+typedef int                     khronos_int32_t;
+typedef unsigned int            khronos_uint32_t;
+#define KHRONOS_SUPPORT_INT64   0
+#define KHRONOS_SUPPORT_FLOAT   0
+
+#else
+
+/*
+ * Generic fallback
+ */
+#include <stdint.h>
+typedef int32_t                 khronos_int32_t;
+typedef uint32_t                khronos_uint32_t;
+typedef int64_t                 khronos_int64_t;
+typedef uint64_t                khronos_uint64_t;
+#define KHRONOS_SUPPORT_INT64   1
+#define KHRONOS_SUPPORT_FLOAT   1
+
+#endif
+
+
+/*
+ * Types that are (so far) the same on all platforms
+ */
+typedef signed   char          khronos_int8_t;
+typedef unsigned char          khronos_uint8_t;
+typedef signed   short int     khronos_int16_t;
+typedef unsigned short int     khronos_uint16_t;
+
+/*
+ * Types that differ between LLP64 and LP64 architectures - in LLP64,
+ * pointers are 64 bits, but 'long' is still 32 bits. Win64 appears
+ * to be the only LLP64 architecture in current use.
+ */
+#ifdef _WIN64
+typedef signed   long long int khronos_intptr_t;
+typedef unsigned long long int khronos_uintptr_t;
+typedef signed   long long int khronos_ssize_t;
+typedef unsigned long long int khronos_usize_t;
+#else
+typedef signed   long  int     khronos_intptr_t;
+typedef unsigned long  int     khronos_uintptr_t;
+typedef signed   long  int     khronos_ssize_t;
+typedef unsigned long  int     khronos_usize_t;
+#endif
+
+#if KHRONOS_SUPPORT_FLOAT
+/*
+ * Float type
+ */
+typedef          float         khronos_float_t;
+#endif
+
+#if KHRONOS_SUPPORT_INT64
+/* Time types
+ *
+ * These types can be used to represent a time interval in nanoseconds or
+ * an absolute Unadjusted System Time.  Unadjusted System Time is the number
+ * of nanoseconds since some arbitrary system event (e.g. since the last
+ * time the system booted).  The Unadjusted System Time is an unsigned
+ * 64 bit value that wraps back to 0 every 584 years.  Time intervals
+ * may be either signed or unsigned.
+ */
+typedef khronos_uint64_t       khronos_utime_nanoseconds_t;
+typedef khronos_int64_t        khronos_stime_nanoseconds_t;
+#endif
+
+/*
+ * Dummy value used to pad enum types to 32 bits.
+ */
+#ifndef KHRONOS_MAX_ENUM
+#define KHRONOS_MAX_ENUM 0x7FFFFFFF
+#endif
+
+/*
+ * Enumerated boolean type
+ *
+ * Values other than zero should be considered to be true.  Therefore
+ * comparisons should not be made against KHRONOS_TRUE.
+ */
+typedef enum {
+    KHRONOS_FALSE = 0,
+    KHRONOS_TRUE  = 1,
+    KHRONOS_BOOLEAN_ENUM_FORCE_SIZE = KHRONOS_MAX_ENUM
+} khronos_boolean_enum_t;
+
+#endif /* __khrplatform_h_ */
diff --git a/thirdparty/glad/EGL/eglplatform.h b/thirdparty/glad/EGL/eglplatform.h
new file mode 100644
index 0000000000..99362a23de
--- /dev/null
+++ b/thirdparty/glad/EGL/eglplatform.h
@@ -0,0 +1,169 @@
+#ifndef __eglplatform_h_
+#define __eglplatform_h_
+
+/*
+** Copyright 2007-2020 The Khronos Group Inc.
+** SPDX-License-Identifier: Apache-2.0
+*/
+
+/* Platform-specific types and definitions for egl.h
+ *
+ * Adopters may modify khrplatform.h and this file to suit their platform.
+ * You are encouraged to submit all modifications to the Khronos group so that
+ * they can be included in future versions of this file.  Please submit changes
+ * by filing an issue or pull request on the public Khronos EGL Registry, at
+ * https://www.github.com/KhronosGroup/EGL-Registry/
+ */
+
+#include <KHR/khrplatform.h>
+
+/* Macros used in EGL function prototype declarations.
+ *
+ * EGL functions should be prototyped as:
+ *
+ * EGLAPI return-type EGLAPIENTRY eglFunction(arguments);
+ * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments);
+ *
+ * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h
+ */
+
+#ifndef EGLAPI
+#define EGLAPI KHRONOS_APICALL
+#endif
+
+#ifndef EGLAPIENTRY
+#define EGLAPIENTRY  KHRONOS_APIENTRY
+#endif
+#define EGLAPIENTRYP EGLAPIENTRY*
+
+/* The types NativeDisplayType, NativeWindowType, and NativePixmapType
+ * are aliases of window-system-dependent types, such as X Display * or
+ * Windows Device Context. They must be defined in platform-specific
+ * code below. The EGL-prefixed versions of Native*Type are the same
+ * types, renamed in EGL 1.3 so all types in the API start with "EGL".
+ *
+ * Khronos STRONGLY RECOMMENDS that you use the default definitions
+ * provided below, since these changes affect both binary and source
+ * portability of applications using EGL running on different EGL
+ * implementations.
+ */
+
+#if defined(EGL_NO_PLATFORM_SPECIFIC_TYPES)
+
+typedef void *EGLNativeDisplayType;
+typedef void *EGLNativePixmapType;
+typedef void *EGLNativeWindowType;
+
+#elif defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */
+#ifndef WIN32_LEAN_AND_MEAN
+#define WIN32_LEAN_AND_MEAN 1
+#endif
+#include <windows.h>
+
+typedef HDC     EGLNativeDisplayType;
+typedef HBITMAP EGLNativePixmapType;
+typedef HWND    EGLNativeWindowType;
+
+#elif defined(__EMSCRIPTEN__)
+
+typedef int EGLNativeDisplayType;
+typedef int EGLNativePixmapType;
+typedef int EGLNativeWindowType;
+
+#elif defined(__WINSCW__) || defined(__SYMBIAN32__)  /* Symbian */
+
+typedef int   EGLNativeDisplayType;
+typedef void *EGLNativePixmapType;
+typedef void *EGLNativeWindowType;
+
+#elif defined(WL_EGL_PLATFORM)
+
+typedef struct wl_display     *EGLNativeDisplayType;
+typedef struct wl_egl_pixmap  *EGLNativePixmapType;
+typedef struct wl_egl_window  *EGLNativeWindowType;
+
+#elif defined(__GBM__)
+
+typedef struct gbm_device  *EGLNativeDisplayType;
+typedef struct gbm_bo      *EGLNativePixmapType;
+typedef void               *EGLNativeWindowType;
+
+#elif defined(__ANDROID__) || defined(ANDROID)
+
+struct ANativeWindow;
+struct egl_native_pixmap_t;
+
+typedef void*                           EGLNativeDisplayType;
+typedef struct egl_native_pixmap_t*     EGLNativePixmapType;
+typedef struct ANativeWindow*           EGLNativeWindowType;
+
+#elif defined(USE_OZONE)
+
+typedef intptr_t EGLNativeDisplayType;
+typedef intptr_t EGLNativePixmapType;
+typedef intptr_t EGLNativeWindowType;
+
+#elif defined(USE_X11)
+
+/* X11 (tentative)  */
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+
+typedef Display *EGLNativeDisplayType;
+typedef Pixmap   EGLNativePixmapType;
+typedef Window   EGLNativeWindowType;
+
+#elif defined(__unix__)
+
+typedef void             *EGLNativeDisplayType;
+typedef khronos_uintptr_t EGLNativePixmapType;
+typedef khronos_uintptr_t EGLNativeWindowType;
+
+#elif defined(__APPLE__)
+
+typedef int   EGLNativeDisplayType;
+typedef void *EGLNativePixmapType;
+typedef void *EGLNativeWindowType;
+
+#elif defined(__HAIKU__)
+
+#include <kernel/image.h>
+
+typedef void              *EGLNativeDisplayType;
+typedef khronos_uintptr_t  EGLNativePixmapType;
+typedef khronos_uintptr_t  EGLNativeWindowType;
+
+#elif defined(__Fuchsia__)
+
+typedef void              *EGLNativeDisplayType;
+typedef khronos_uintptr_t  EGLNativePixmapType;
+typedef khronos_uintptr_t  EGLNativeWindowType;
+
+#else
+#error "Platform not recognized"
+#endif
+
+/* EGL 1.2 types, renamed for consistency in EGL 1.3 */
+typedef EGLNativeDisplayType NativeDisplayType;
+typedef EGLNativePixmapType  NativePixmapType;
+typedef EGLNativeWindowType  NativeWindowType;
+
+
+/* Define EGLint. This must be a signed integral type large enough to contain
+ * all legal attribute names and values passed into and out of EGL, whether
+ * their type is boolean, bitmask, enumerant (symbolic constant), integer,
+ * handle, or other.  While in general a 32-bit integer will suffice, if
+ * handles are 64 bit types, then EGLint should be defined as a signed 64-bit
+ * integer type.
+ */
+typedef khronos_int32_t EGLint;
+
+
+/* C++ / C typecast macros for special EGL handle values */
+#if defined(__cplusplus)
+#define EGL_CAST(type, value) (static_cast<type>(value))
+#else
+#define EGL_CAST(type, value) ((type) (value))
+#endif
+
+#endif /* __eglplatform_h */
diff --git a/thirdparty/glad/egl.c b/thirdparty/glad/egl.c
new file mode 100644
index 0000000000..e120ea6b2c
--- /dev/null
+++ b/thirdparty/glad/egl.c
@@ -0,0 +1,408 @@
+/**
+ * SPDX-License-Identifier: (WTFPL OR CC0-1.0) AND Apache-2.0
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <glad/egl.h>
+
+#ifndef GLAD_IMPL_UTIL_C_
+#define GLAD_IMPL_UTIL_C_
+
+#ifdef _MSC_VER
+#define GLAD_IMPL_UTIL_SSCANF sscanf_s
+#else
+#define GLAD_IMPL_UTIL_SSCANF sscanf
+#endif
+
+#endif /* GLAD_IMPL_UTIL_C_ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+
+int GLAD_EGL_VERSION_1_0 = 0;
+int GLAD_EGL_VERSION_1_1 = 0;
+int GLAD_EGL_VERSION_1_2 = 0;
+int GLAD_EGL_VERSION_1_3 = 0;
+int GLAD_EGL_VERSION_1_4 = 0;
+int GLAD_EGL_VERSION_1_5 = 0;
+int GLAD_EGL_ANDROID_blob_cache = 0;
+int GLAD_EGL_KHR_platform_wayland = 0;
+int GLAD_EGL_KHR_platform_x11 = 0;
+
+
+
+PFNEGLBINDAPIPROC glad_eglBindAPI = NULL;
+PFNEGLBINDTEXIMAGEPROC glad_eglBindTexImage = NULL;
+PFNEGLCHOOSECONFIGPROC glad_eglChooseConfig = NULL;
+PFNEGLCLIENTWAITSYNCPROC glad_eglClientWaitSync = NULL;
+PFNEGLCOPYBUFFERSPROC glad_eglCopyBuffers = NULL;
+PFNEGLCREATECONTEXTPROC glad_eglCreateContext = NULL;
+PFNEGLCREATEIMAGEPROC glad_eglCreateImage = NULL;
+PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC glad_eglCreatePbufferFromClientBuffer = NULL;
+PFNEGLCREATEPBUFFERSURFACEPROC glad_eglCreatePbufferSurface = NULL;
+PFNEGLCREATEPIXMAPSURFACEPROC glad_eglCreatePixmapSurface = NULL;
+PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC glad_eglCreatePlatformPixmapSurface = NULL;
+PFNEGLCREATEPLATFORMWINDOWSURFACEPROC glad_eglCreatePlatformWindowSurface = NULL;
+PFNEGLCREATESYNCPROC glad_eglCreateSync = NULL;
+PFNEGLCREATEWINDOWSURFACEPROC glad_eglCreateWindowSurface = NULL;
+PFNEGLDESTROYCONTEXTPROC glad_eglDestroyContext = NULL;
+PFNEGLDESTROYIMAGEPROC glad_eglDestroyImage = NULL;
+PFNEGLDESTROYSURFACEPROC glad_eglDestroySurface = NULL;
+PFNEGLDESTROYSYNCPROC glad_eglDestroySync = NULL;
+PFNEGLGETCONFIGATTRIBPROC glad_eglGetConfigAttrib = NULL;
+PFNEGLGETCONFIGSPROC glad_eglGetConfigs = NULL;
+PFNEGLGETCURRENTCONTEXTPROC glad_eglGetCurrentContext = NULL;
+PFNEGLGETCURRENTDISPLAYPROC glad_eglGetCurrentDisplay = NULL;
+PFNEGLGETCURRENTSURFACEPROC glad_eglGetCurrentSurface = NULL;
+PFNEGLGETDISPLAYPROC glad_eglGetDisplay = NULL;
+PFNEGLGETERRORPROC glad_eglGetError = NULL;
+PFNEGLGETPLATFORMDISPLAYPROC glad_eglGetPlatformDisplay = NULL;
+PFNEGLGETPROCADDRESSPROC glad_eglGetProcAddress = NULL;
+PFNEGLGETSYNCATTRIBPROC glad_eglGetSyncAttrib = NULL;
+PFNEGLINITIALIZEPROC glad_eglInitialize = NULL;
+PFNEGLMAKECURRENTPROC glad_eglMakeCurrent = NULL;
+PFNEGLQUERYAPIPROC glad_eglQueryAPI = NULL;
+PFNEGLQUERYCONTEXTPROC glad_eglQueryContext = NULL;
+PFNEGLQUERYSTRINGPROC glad_eglQueryString = NULL;
+PFNEGLQUERYSURFACEPROC glad_eglQuerySurface = NULL;
+PFNEGLRELEASETEXIMAGEPROC glad_eglReleaseTexImage = NULL;
+PFNEGLRELEASETHREADPROC glad_eglReleaseThread = NULL;
+PFNEGLSETBLOBCACHEFUNCSANDROIDPROC glad_eglSetBlobCacheFuncsANDROID = NULL;
+PFNEGLSURFACEATTRIBPROC glad_eglSurfaceAttrib = NULL;
+PFNEGLSWAPBUFFERSPROC glad_eglSwapBuffers = NULL;
+PFNEGLSWAPINTERVALPROC glad_eglSwapInterval = NULL;
+PFNEGLTERMINATEPROC glad_eglTerminate = NULL;
+PFNEGLWAITCLIENTPROC glad_eglWaitClient = NULL;
+PFNEGLWAITGLPROC glad_eglWaitGL = NULL;
+PFNEGLWAITNATIVEPROC glad_eglWaitNative = NULL;
+PFNEGLWAITSYNCPROC glad_eglWaitSync = NULL;
+
+
+static void glad_egl_load_EGL_VERSION_1_0( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_EGL_VERSION_1_0) return;
+    glad_eglChooseConfig = (PFNEGLCHOOSECONFIGPROC) load(userptr, "eglChooseConfig");
+    glad_eglCopyBuffers = (PFNEGLCOPYBUFFERSPROC) load(userptr, "eglCopyBuffers");
+    glad_eglCreateContext = (PFNEGLCREATECONTEXTPROC) load(userptr, "eglCreateContext");
+    glad_eglCreatePbufferSurface = (PFNEGLCREATEPBUFFERSURFACEPROC) load(userptr, "eglCreatePbufferSurface");
+    glad_eglCreatePixmapSurface = (PFNEGLCREATEPIXMAPSURFACEPROC) load(userptr, "eglCreatePixmapSurface");
+    glad_eglCreateWindowSurface = (PFNEGLCREATEWINDOWSURFACEPROC) load(userptr, "eglCreateWindowSurface");
+    glad_eglDestroyContext = (PFNEGLDESTROYCONTEXTPROC) load(userptr, "eglDestroyContext");
+    glad_eglDestroySurface = (PFNEGLDESTROYSURFACEPROC) load(userptr, "eglDestroySurface");
+    glad_eglGetConfigAttrib = (PFNEGLGETCONFIGATTRIBPROC) load(userptr, "eglGetConfigAttrib");
+    glad_eglGetConfigs = (PFNEGLGETCONFIGSPROC) load(userptr, "eglGetConfigs");
+    glad_eglGetCurrentDisplay = (PFNEGLGETCURRENTDISPLAYPROC) load(userptr, "eglGetCurrentDisplay");
+    glad_eglGetCurrentSurface = (PFNEGLGETCURRENTSURFACEPROC) load(userptr, "eglGetCurrentSurface");
+    glad_eglGetDisplay = (PFNEGLGETDISPLAYPROC) load(userptr, "eglGetDisplay");
+    glad_eglGetError = (PFNEGLGETERRORPROC) load(userptr, "eglGetError");
+    glad_eglGetProcAddress = (PFNEGLGETPROCADDRESSPROC) load(userptr, "eglGetProcAddress");
+    glad_eglInitialize = (PFNEGLINITIALIZEPROC) load(userptr, "eglInitialize");
+    glad_eglMakeCurrent = (PFNEGLMAKECURRENTPROC) load(userptr, "eglMakeCurrent");
+    glad_eglQueryContext = (PFNEGLQUERYCONTEXTPROC) load(userptr, "eglQueryContext");
+    glad_eglQueryString = (PFNEGLQUERYSTRINGPROC) load(userptr, "eglQueryString");
+    glad_eglQuerySurface = (PFNEGLQUERYSURFACEPROC) load(userptr, "eglQuerySurface");
+    glad_eglSwapBuffers = (PFNEGLSWAPBUFFERSPROC) load(userptr, "eglSwapBuffers");
+    glad_eglTerminate = (PFNEGLTERMINATEPROC) load(userptr, "eglTerminate");
+    glad_eglWaitGL = (PFNEGLWAITGLPROC) load(userptr, "eglWaitGL");
+    glad_eglWaitNative = (PFNEGLWAITNATIVEPROC) load(userptr, "eglWaitNative");
+}
+static void glad_egl_load_EGL_VERSION_1_1( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_EGL_VERSION_1_1) return;
+    glad_eglBindTexImage = (PFNEGLBINDTEXIMAGEPROC) load(userptr, "eglBindTexImage");
+    glad_eglReleaseTexImage = (PFNEGLRELEASETEXIMAGEPROC) load(userptr, "eglReleaseTexImage");
+    glad_eglSurfaceAttrib = (PFNEGLSURFACEATTRIBPROC) load(userptr, "eglSurfaceAttrib");
+    glad_eglSwapInterval = (PFNEGLSWAPINTERVALPROC) load(userptr, "eglSwapInterval");
+}
+static void glad_egl_load_EGL_VERSION_1_2( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_EGL_VERSION_1_2) return;
+    glad_eglBindAPI = (PFNEGLBINDAPIPROC) load(userptr, "eglBindAPI");
+    glad_eglCreatePbufferFromClientBuffer = (PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC) load(userptr, "eglCreatePbufferFromClientBuffer");
+    glad_eglQueryAPI = (PFNEGLQUERYAPIPROC) load(userptr, "eglQueryAPI");
+    glad_eglReleaseThread = (PFNEGLRELEASETHREADPROC) load(userptr, "eglReleaseThread");
+    glad_eglWaitClient = (PFNEGLWAITCLIENTPROC) load(userptr, "eglWaitClient");
+}
+static void glad_egl_load_EGL_VERSION_1_4( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_EGL_VERSION_1_4) return;
+    glad_eglGetCurrentContext = (PFNEGLGETCURRENTCONTEXTPROC) load(userptr, "eglGetCurrentContext");
+}
+static void glad_egl_load_EGL_VERSION_1_5( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_EGL_VERSION_1_5) return;
+    glad_eglClientWaitSync = (PFNEGLCLIENTWAITSYNCPROC) load(userptr, "eglClientWaitSync");
+    glad_eglCreateImage = (PFNEGLCREATEIMAGEPROC) load(userptr, "eglCreateImage");
+    glad_eglCreatePlatformPixmapSurface = (PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC) load(userptr, "eglCreatePlatformPixmapSurface");
+    glad_eglCreatePlatformWindowSurface = (PFNEGLCREATEPLATFORMWINDOWSURFACEPROC) load(userptr, "eglCreatePlatformWindowSurface");
+    glad_eglCreateSync = (PFNEGLCREATESYNCPROC) load(userptr, "eglCreateSync");
+    glad_eglDestroyImage = (PFNEGLDESTROYIMAGEPROC) load(userptr, "eglDestroyImage");
+    glad_eglDestroySync = (PFNEGLDESTROYSYNCPROC) load(userptr, "eglDestroySync");
+    glad_eglGetPlatformDisplay = (PFNEGLGETPLATFORMDISPLAYPROC) load(userptr, "eglGetPlatformDisplay");
+    glad_eglGetSyncAttrib = (PFNEGLGETSYNCATTRIBPROC) load(userptr, "eglGetSyncAttrib");
+    glad_eglWaitSync = (PFNEGLWAITSYNCPROC) load(userptr, "eglWaitSync");
+}
+static void glad_egl_load_EGL_ANDROID_blob_cache( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_EGL_ANDROID_blob_cache) return;
+    glad_eglSetBlobCacheFuncsANDROID = (PFNEGLSETBLOBCACHEFUNCSANDROIDPROC) load(userptr, "eglSetBlobCacheFuncsANDROID");
+}
+
+
+
+static int glad_egl_get_extensions(EGLDisplay display, const char **extensions) {
+    *extensions = eglQueryString(display, EGL_EXTENSIONS);
+
+    return extensions != NULL;
+}
+
+static int glad_egl_has_extension(const char *extensions, const char *ext) {
+    const char *loc;
+    const char *terminator;
+    if(extensions == NULL) {
+        return 0;
+    }
+    while(1) {
+        loc = strstr(extensions, ext);
+        if(loc == NULL) {
+            return 0;
+        }
+        terminator = loc + strlen(ext);
+        if((loc == extensions || *(loc - 1) == ' ') &&
+            (*terminator == ' ' || *terminator == '\0')) {
+            return 1;
+        }
+        extensions = terminator;
+    }
+}
+
+static GLADapiproc glad_egl_get_proc_from_userptr(void *userptr, const char *name) {
+    return (GLAD_GNUC_EXTENSION (GLADapiproc (*)(const char *name)) userptr)(name);
+}
+
+static int glad_egl_find_extensions_egl(EGLDisplay display) {
+    const char *extensions;
+    if (!glad_egl_get_extensions(display, &extensions)) return 0;
+
+    GLAD_EGL_ANDROID_blob_cache = glad_egl_has_extension(extensions, "EGL_ANDROID_blob_cache");
+    GLAD_EGL_KHR_platform_wayland = glad_egl_has_extension(extensions, "EGL_KHR_platform_wayland");
+    GLAD_EGL_KHR_platform_x11 = glad_egl_has_extension(extensions, "EGL_KHR_platform_x11");
+
+    return 1;
+}
+
+static int glad_egl_find_core_egl(EGLDisplay display) {
+    int major, minor;
+    const char *version;
+
+    if (display == NULL) {
+        display = EGL_NO_DISPLAY; /* this is usually NULL, better safe than sorry */
+    }
+    if (display == EGL_NO_DISPLAY) {
+        display = eglGetCurrentDisplay();
+    }
+#ifdef EGL_VERSION_1_4
+    if (display == EGL_NO_DISPLAY) {
+        display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
+    }
+#endif
+#ifndef EGL_VERSION_1_5
+    if (display == EGL_NO_DISPLAY) {
+        return 0;
+    }
+#endif
+
+    version = eglQueryString(display, EGL_VERSION);
+    (void) eglGetError();
+
+    if (version == NULL) {
+        major = 1;
+        minor = 0;
+    } else {
+        GLAD_IMPL_UTIL_SSCANF(version, "%d.%d", &major, &minor);
+    }
+
+    GLAD_EGL_VERSION_1_0 = (major == 1 && minor >= 0) || major > 1;
+    GLAD_EGL_VERSION_1_1 = (major == 1 && minor >= 1) || major > 1;
+    GLAD_EGL_VERSION_1_2 = (major == 1 && minor >= 2) || major > 1;
+    GLAD_EGL_VERSION_1_3 = (major == 1 && minor >= 3) || major > 1;
+    GLAD_EGL_VERSION_1_4 = (major == 1 && minor >= 4) || major > 1;
+    GLAD_EGL_VERSION_1_5 = (major == 1 && minor >= 5) || major > 1;
+
+    return GLAD_MAKE_VERSION(major, minor);
+}
+
+int gladLoadEGLUserPtr(EGLDisplay display, GLADuserptrloadfunc load, void* userptr) {
+    int version;
+    eglGetDisplay = (PFNEGLGETDISPLAYPROC) load(userptr, "eglGetDisplay");
+    eglGetCurrentDisplay = (PFNEGLGETCURRENTDISPLAYPROC) load(userptr, "eglGetCurrentDisplay");
+    eglQueryString = (PFNEGLQUERYSTRINGPROC) load(userptr, "eglQueryString");
+    eglGetError = (PFNEGLGETERRORPROC) load(userptr, "eglGetError");
+    if (eglGetDisplay == NULL || eglGetCurrentDisplay == NULL || eglQueryString == NULL || eglGetError == NULL) return 0;
+
+    version = glad_egl_find_core_egl(display);
+    if (!version) return 0;
+    glad_egl_load_EGL_VERSION_1_0(load, userptr);
+    glad_egl_load_EGL_VERSION_1_1(load, userptr);
+    glad_egl_load_EGL_VERSION_1_2(load, userptr);
+    glad_egl_load_EGL_VERSION_1_4(load, userptr);
+    glad_egl_load_EGL_VERSION_1_5(load, userptr);
+
+    if (!glad_egl_find_extensions_egl(display)) return 0;
+    glad_egl_load_EGL_ANDROID_blob_cache(load, userptr);
+
+
+    return version;
+}
+
+int gladLoadEGL(EGLDisplay display, GLADloadfunc load) {
+    return gladLoadEGLUserPtr(display, glad_egl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load);
+}
+
+ 
+
+#ifdef GLAD_EGL
+
+#ifndef GLAD_LOADER_LIBRARY_C_
+#define GLAD_LOADER_LIBRARY_C_
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#if GLAD_PLATFORM_WIN32
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+
+
+static void* glad_get_dlopen_handle(const char *lib_names[], int length) {
+    void *handle = NULL;
+    int i;
+
+    for (i = 0; i < length; ++i) {
+#if GLAD_PLATFORM_WIN32
+  #if GLAD_PLATFORM_UWP
+        size_t buffer_size = (strlen(lib_names[i]) + 1) * sizeof(WCHAR);
+        LPWSTR buffer = (LPWSTR) malloc(buffer_size);
+        if (buffer != NULL) {
+            int ret = MultiByteToWideChar(CP_ACP, 0, lib_names[i], -1, buffer, buffer_size);
+            if (ret != 0) {
+                handle = (void*) LoadPackagedLibrary(buffer, 0);
+            }
+            free((void*) buffer);
+        }
+  #else
+        handle = (void*) LoadLibraryA(lib_names[i]);
+  #endif
+#else
+        handle = dlopen(lib_names[i], RTLD_LAZY | RTLD_LOCAL);
+#endif
+        if (handle != NULL) {
+            return handle;
+        }
+    }
+
+    return NULL;
+}
+
+static void glad_close_dlopen_handle(void* handle) {
+    if (handle != NULL) {
+#if GLAD_PLATFORM_WIN32
+        FreeLibrary((HMODULE) handle);
+#else
+        dlclose(handle);
+#endif
+    }
+}
+
+static GLADapiproc glad_dlsym_handle(void* handle, const char *name) {
+    if (handle == NULL) {
+        return NULL;
+    }
+
+#if GLAD_PLATFORM_WIN32
+    return (GLADapiproc) GetProcAddress((HMODULE) handle, name);
+#else
+    return GLAD_GNUC_EXTENSION (GLADapiproc) dlsym(handle, name);
+#endif
+}
+
+#endif /* GLAD_LOADER_LIBRARY_C_ */
+
+struct _glad_egl_userptr {
+    void *handle;
+    PFNEGLGETPROCADDRESSPROC get_proc_address_ptr;
+};
+
+static GLADapiproc glad_egl_get_proc(void *vuserptr, const char* name) {
+    struct _glad_egl_userptr userptr = *(struct _glad_egl_userptr*) vuserptr;
+    GLADapiproc result = NULL;
+
+    result = glad_dlsym_handle(userptr.handle, name);
+    if (result == NULL) {
+        result = GLAD_GNUC_EXTENSION (GLADapiproc) userptr.get_proc_address_ptr(name);
+    }
+
+    return result;
+}
+
+static void* _egl_handle = NULL;
+
+static void* glad_egl_dlopen_handle(void) {
+#if GLAD_PLATFORM_APPLE
+    static const char *NAMES[] = {"libEGL.dylib"};
+#elif GLAD_PLATFORM_WIN32
+    static const char *NAMES[] = {"libEGL.dll", "EGL.dll"};
+#else
+    static const char *NAMES[] = {"libEGL.so.1", "libEGL.so"};
+#endif
+
+    if (_egl_handle == NULL) {
+        _egl_handle = glad_get_dlopen_handle(NAMES, sizeof(NAMES) / sizeof(NAMES[0]));
+    }
+
+    return _egl_handle;
+}
+
+static struct _glad_egl_userptr glad_egl_build_userptr(void *handle) {
+    struct _glad_egl_userptr userptr;
+    userptr.handle = handle;
+    userptr.get_proc_address_ptr = (PFNEGLGETPROCADDRESSPROC) glad_dlsym_handle(handle, "eglGetProcAddress");
+    return userptr;
+}
+
+int gladLoaderLoadEGL(EGLDisplay display) {
+    int version = 0;
+    void *handle = NULL;
+    int did_load = 0;
+    struct _glad_egl_userptr userptr;
+
+    did_load = _egl_handle == NULL;
+    handle = glad_egl_dlopen_handle();
+    if (handle != NULL) {
+        userptr = glad_egl_build_userptr(handle);
+
+        if (userptr.get_proc_address_ptr != NULL) {
+            version = gladLoadEGLUserPtr(display, glad_egl_get_proc, &userptr);
+        }
+
+        if (!version && did_load) {
+            gladLoaderUnloadEGL();
+        }
+    }
+
+    return version;
+}
+
+
+void gladLoaderUnloadEGL() {
+    if (_egl_handle != NULL) {
+        glad_close_dlopen_handle(_egl_handle);
+        _egl_handle = NULL;
+    }
+}
+
+#endif /* GLAD_EGL */
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/thirdparty/glad/gl.c b/thirdparty/glad/gl.c
index 8d12541ed4..ee0cc188fc 100644
--- a/thirdparty/glad/gl.c
+++ b/thirdparty/glad/gl.c
@@ -35,6 +35,10 @@ int GLAD_GL_VERSION_3_0 = 0;
 int GLAD_GL_VERSION_3_1 = 0;
 int GLAD_GL_VERSION_3_2 = 0;
 int GLAD_GL_VERSION_3_3 = 0;
+int GLAD_GL_ES_VERSION_2_0 = 0;
+int GLAD_GL_ES_VERSION_3_0 = 0;
+int GLAD_GL_ES_VERSION_3_1 = 0;
+int GLAD_GL_ES_VERSION_3_2 = 0;
 int GLAD_GL_ARB_debug_output = 0;
 int GLAD_GL_ARB_framebuffer_object = 0;
 int GLAD_GL_ARB_get_program_binary = 0;
@@ -797,6 +801,111 @@ PFNGLWINDOWPOS3IPROC glad_glWindowPos3i = NULL;
 PFNGLWINDOWPOS3IVPROC glad_glWindowPos3iv = NULL;
 PFNGLWINDOWPOS3SPROC glad_glWindowPos3s = NULL;
 PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv = NULL;
+PFNGLACTIVESHADERPROGRAMPROC glad_glActiveShaderProgram = NULL;
+PFNGLBINDIMAGETEXTUREPROC glad_glBindImageTexture = NULL;
+PFNGLBINDPROGRAMPIPELINEPROC glad_glBindProgramPipeline = NULL;
+PFNGLBINDTRANSFORMFEEDBACKPROC glad_glBindTransformFeedback = NULL;
+PFNGLBINDVERTEXBUFFERPROC glad_glBindVertexBuffer = NULL;
+PFNGLBLENDBARRIERPROC glad_glBlendBarrier = NULL;
+PFNGLBLENDEQUATIONSEPARATEIPROC glad_glBlendEquationSeparatei = NULL;
+PFNGLBLENDEQUATIONIPROC glad_glBlendEquationi = NULL;
+PFNGLBLENDFUNCSEPARATEIPROC glad_glBlendFuncSeparatei = NULL;
+PFNGLBLENDFUNCIPROC glad_glBlendFunci = NULL;
+PFNGLCLEARDEPTHFPROC glad_glClearDepthf = NULL;
+PFNGLCOPYIMAGESUBDATAPROC glad_glCopyImageSubData = NULL;
+PFNGLCREATESHADERPROGRAMVPROC glad_glCreateShaderProgramv = NULL;
+PFNGLDEBUGMESSAGECALLBACKPROC glad_glDebugMessageCallback = NULL;
+PFNGLDEBUGMESSAGECONTROLPROC glad_glDebugMessageControl = NULL;
+PFNGLDEBUGMESSAGEINSERTPROC glad_glDebugMessageInsert = NULL;
+PFNGLDELETEPROGRAMPIPELINESPROC glad_glDeleteProgramPipelines = NULL;
+PFNGLDELETETRANSFORMFEEDBACKSPROC glad_glDeleteTransformFeedbacks = NULL;
+PFNGLDEPTHRANGEFPROC glad_glDepthRangef = NULL;
+PFNGLDISPATCHCOMPUTEPROC glad_glDispatchCompute = NULL;
+PFNGLDISPATCHCOMPUTEINDIRECTPROC glad_glDispatchComputeIndirect = NULL;
+PFNGLDRAWARRAYSINDIRECTPROC glad_glDrawArraysIndirect = NULL;
+PFNGLDRAWELEMENTSINDIRECTPROC glad_glDrawElementsIndirect = NULL;
+PFNGLFRAMEBUFFERPARAMETERIPROC glad_glFramebufferParameteri = NULL;
+PFNGLGENPROGRAMPIPELINESPROC glad_glGenProgramPipelines = NULL;
+PFNGLGENTRANSFORMFEEDBACKSPROC glad_glGenTransformFeedbacks = NULL;
+PFNGLGETDEBUGMESSAGELOGPROC glad_glGetDebugMessageLog = NULL;
+PFNGLGETFRAMEBUFFERPARAMETERIVPROC glad_glGetFramebufferParameteriv = NULL;
+PFNGLGETGRAPHICSRESETSTATUSPROC glad_glGetGraphicsResetStatus = NULL;
+PFNGLGETINTERNALFORMATIVPROC glad_glGetInternalformativ = NULL;
+PFNGLGETOBJECTLABELPROC glad_glGetObjectLabel = NULL;
+PFNGLGETOBJECTPTRLABELPROC glad_glGetObjectPtrLabel = NULL;
+PFNGLGETPROGRAMINTERFACEIVPROC glad_glGetProgramInterfaceiv = NULL;
+PFNGLGETPROGRAMPIPELINEINFOLOGPROC glad_glGetProgramPipelineInfoLog = NULL;
+PFNGLGETPROGRAMPIPELINEIVPROC glad_glGetProgramPipelineiv = NULL;
+PFNGLGETPROGRAMRESOURCEINDEXPROC glad_glGetProgramResourceIndex = NULL;
+PFNGLGETPROGRAMRESOURCELOCATIONPROC glad_glGetProgramResourceLocation = NULL;
+PFNGLGETPROGRAMRESOURCENAMEPROC glad_glGetProgramResourceName = NULL;
+PFNGLGETPROGRAMRESOURCEIVPROC glad_glGetProgramResourceiv = NULL;
+PFNGLGETSHADERPRECISIONFORMATPROC glad_glGetShaderPrecisionFormat = NULL;
+PFNGLGETNUNIFORMFVPROC glad_glGetnUniformfv = NULL;
+PFNGLGETNUNIFORMIVPROC glad_glGetnUniformiv = NULL;
+PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv = NULL;
+PFNGLINVALIDATEFRAMEBUFFERPROC glad_glInvalidateFramebuffer = NULL;
+PFNGLINVALIDATESUBFRAMEBUFFERPROC glad_glInvalidateSubFramebuffer = NULL;
+PFNGLISPROGRAMPIPELINEPROC glad_glIsProgramPipeline = NULL;
+PFNGLISTRANSFORMFEEDBACKPROC glad_glIsTransformFeedback = NULL;
+PFNGLMEMORYBARRIERPROC glad_glMemoryBarrier = NULL;
+PFNGLMEMORYBARRIERBYREGIONPROC glad_glMemoryBarrierByRegion = NULL;
+PFNGLMINSAMPLESHADINGPROC glad_glMinSampleShading = NULL;
+PFNGLOBJECTLABELPROC glad_glObjectLabel = NULL;
+PFNGLOBJECTPTRLABELPROC glad_glObjectPtrLabel = NULL;
+PFNGLPATCHPARAMETERIPROC glad_glPatchParameteri = NULL;
+PFNGLPAUSETRANSFORMFEEDBACKPROC glad_glPauseTransformFeedback = NULL;
+PFNGLPOPDEBUGGROUPPROC glad_glPopDebugGroup = NULL;
+PFNGLPRIMITIVEBOUNDINGBOXPROC glad_glPrimitiveBoundingBox = NULL;
+PFNGLPROGRAMUNIFORM1FPROC glad_glProgramUniform1f = NULL;
+PFNGLPROGRAMUNIFORM1FVPROC glad_glProgramUniform1fv = NULL;
+PFNGLPROGRAMUNIFORM1IPROC glad_glProgramUniform1i = NULL;
+PFNGLPROGRAMUNIFORM1IVPROC glad_glProgramUniform1iv = NULL;
+PFNGLPROGRAMUNIFORM1UIPROC glad_glProgramUniform1ui = NULL;
+PFNGLPROGRAMUNIFORM1UIVPROC glad_glProgramUniform1uiv = NULL;
+PFNGLPROGRAMUNIFORM2FPROC glad_glProgramUniform2f = NULL;
+PFNGLPROGRAMUNIFORM2FVPROC glad_glProgramUniform2fv = NULL;
+PFNGLPROGRAMUNIFORM2IPROC glad_glProgramUniform2i = NULL;
+PFNGLPROGRAMUNIFORM2IVPROC glad_glProgramUniform2iv = NULL;
+PFNGLPROGRAMUNIFORM2UIPROC glad_glProgramUniform2ui = NULL;
+PFNGLPROGRAMUNIFORM2UIVPROC glad_glProgramUniform2uiv = NULL;
+PFNGLPROGRAMUNIFORM3FPROC glad_glProgramUniform3f = NULL;
+PFNGLPROGRAMUNIFORM3FVPROC glad_glProgramUniform3fv = NULL;
+PFNGLPROGRAMUNIFORM3IPROC glad_glProgramUniform3i = NULL;
+PFNGLPROGRAMUNIFORM3IVPROC glad_glProgramUniform3iv = NULL;
+PFNGLPROGRAMUNIFORM3UIPROC glad_glProgramUniform3ui = NULL;
+PFNGLPROGRAMUNIFORM3UIVPROC glad_glProgramUniform3uiv = NULL;
+PFNGLPROGRAMUNIFORM4FPROC glad_glProgramUniform4f = NULL;
+PFNGLPROGRAMUNIFORM4FVPROC glad_glProgramUniform4fv = NULL;
+PFNGLPROGRAMUNIFORM4IPROC glad_glProgramUniform4i = NULL;
+PFNGLPROGRAMUNIFORM4IVPROC glad_glProgramUniform4iv = NULL;
+PFNGLPROGRAMUNIFORM4UIPROC glad_glProgramUniform4ui = NULL;
+PFNGLPROGRAMUNIFORM4UIVPROC glad_glProgramUniform4uiv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX2FVPROC glad_glProgramUniformMatrix2fv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC glad_glProgramUniformMatrix2x3fv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC glad_glProgramUniformMatrix2x4fv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX3FVPROC glad_glProgramUniformMatrix3fv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC glad_glProgramUniformMatrix3x2fv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC glad_glProgramUniformMatrix3x4fv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX4FVPROC glad_glProgramUniformMatrix4fv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC glad_glProgramUniformMatrix4x2fv = NULL;
+PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC glad_glProgramUniformMatrix4x3fv = NULL;
+PFNGLPUSHDEBUGGROUPPROC glad_glPushDebugGroup = NULL;
+PFNGLREADNPIXELSPROC glad_glReadnPixels = NULL;
+PFNGLRELEASESHADERCOMPILERPROC glad_glReleaseShaderCompiler = NULL;
+PFNGLRESUMETRANSFORMFEEDBACKPROC glad_glResumeTransformFeedback = NULL;
+PFNGLSHADERBINARYPROC glad_glShaderBinary = NULL;
+PFNGLTEXBUFFERRANGEPROC glad_glTexBufferRange = NULL;
+PFNGLTEXSTORAGE2DPROC glad_glTexStorage2D = NULL;
+PFNGLTEXSTORAGE2DMULTISAMPLEPROC glad_glTexStorage2DMultisample = NULL;
+PFNGLTEXSTORAGE3DPROC glad_glTexStorage3D = NULL;
+PFNGLTEXSTORAGE3DMULTISAMPLEPROC glad_glTexStorage3DMultisample = NULL;
+PFNGLUSEPROGRAMSTAGESPROC glad_glUseProgramStages = NULL;
+PFNGLVALIDATEPROGRAMPIPELINEPROC glad_glValidateProgramPipeline = NULL;
+PFNGLVERTEXATTRIBBINDINGPROC glad_glVertexAttribBinding = NULL;
+PFNGLVERTEXATTRIBFORMATPROC glad_glVertexAttribFormat = NULL;
+PFNGLVERTEXATTRIBIFORMATPROC glad_glVertexAttribIFormat = NULL;
+PFNGLVERTEXBINDINGDIVISORPROC glad_glVertexBindingDivisor = NULL;
 
 
 static void glad_gl_load_GL_VERSION_1_0( GLADuserptrloadfunc load, void* userptr) {
@@ -1562,6 +1671,376 @@ static void glad_gl_load_GL_VERSION_3_3( GLADuserptrloadfunc load, void* userptr
     glad_glVertexP4ui = (PFNGLVERTEXP4UIPROC) load(userptr, "glVertexP4ui");
     glad_glVertexP4uiv = (PFNGLVERTEXP4UIVPROC) load(userptr, "glVertexP4uiv");
 }
+static void glad_gl_load_GL_ES_VERSION_2_0( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_GL_ES_VERSION_2_0) return;
+    glad_glActiveTexture = (PFNGLACTIVETEXTUREPROC) load(userptr, "glActiveTexture");
+    glad_glAttachShader = (PFNGLATTACHSHADERPROC) load(userptr, "glAttachShader");
+    glad_glBindAttribLocation = (PFNGLBINDATTRIBLOCATIONPROC) load(userptr, "glBindAttribLocation");
+    glad_glBindBuffer = (PFNGLBINDBUFFERPROC) load(userptr, "glBindBuffer");
+    glad_glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC) load(userptr, "glBindFramebuffer");
+    glad_glBindRenderbuffer = (PFNGLBINDRENDERBUFFERPROC) load(userptr, "glBindRenderbuffer");
+    glad_glBindTexture = (PFNGLBINDTEXTUREPROC) load(userptr, "glBindTexture");
+    glad_glBlendColor = (PFNGLBLENDCOLORPROC) load(userptr, "glBlendColor");
+    glad_glBlendEquation = (PFNGLBLENDEQUATIONPROC) load(userptr, "glBlendEquation");
+    glad_glBlendEquationSeparate = (PFNGLBLENDEQUATIONSEPARATEPROC) load(userptr, "glBlendEquationSeparate");
+    glad_glBlendFunc = (PFNGLBLENDFUNCPROC) load(userptr, "glBlendFunc");
+    glad_glBlendFuncSeparate = (PFNGLBLENDFUNCSEPARATEPROC) load(userptr, "glBlendFuncSeparate");
+    glad_glBufferData = (PFNGLBUFFERDATAPROC) load(userptr, "glBufferData");
+    glad_glBufferSubData = (PFNGLBUFFERSUBDATAPROC) load(userptr, "glBufferSubData");
+    glad_glCheckFramebufferStatus = (PFNGLCHECKFRAMEBUFFERSTATUSPROC) load(userptr, "glCheckFramebufferStatus");
+    glad_glClear = (PFNGLCLEARPROC) load(userptr, "glClear");
+    glad_glClearColor = (PFNGLCLEARCOLORPROC) load(userptr, "glClearColor");
+    glad_glClearDepthf = (PFNGLCLEARDEPTHFPROC) load(userptr, "glClearDepthf");
+    glad_glClearStencil = (PFNGLCLEARSTENCILPROC) load(userptr, "glClearStencil");
+    glad_glColorMask = (PFNGLCOLORMASKPROC) load(userptr, "glColorMask");
+    glad_glCompileShader = (PFNGLCOMPILESHADERPROC) load(userptr, "glCompileShader");
+    glad_glCompressedTexImage2D = (PFNGLCOMPRESSEDTEXIMAGE2DPROC) load(userptr, "glCompressedTexImage2D");
+    glad_glCompressedTexSubImage2D = (PFNGLCOMPRESSEDTEXSUBIMAGE2DPROC) load(userptr, "glCompressedTexSubImage2D");
+    glad_glCopyTexImage2D = (PFNGLCOPYTEXIMAGE2DPROC) load(userptr, "glCopyTexImage2D");
+    glad_glCopyTexSubImage2D = (PFNGLCOPYTEXSUBIMAGE2DPROC) load(userptr, "glCopyTexSubImage2D");
+    glad_glCreateProgram = (PFNGLCREATEPROGRAMPROC) load(userptr, "glCreateProgram");
+    glad_glCreateShader = (PFNGLCREATESHADERPROC) load(userptr, "glCreateShader");
+    glad_glCullFace = (PFNGLCULLFACEPROC) load(userptr, "glCullFace");
+    glad_glDeleteBuffers = (PFNGLDELETEBUFFERSPROC) load(userptr, "glDeleteBuffers");
+    glad_glDeleteFramebuffers = (PFNGLDELETEFRAMEBUFFERSPROC) load(userptr, "glDeleteFramebuffers");
+    glad_glDeleteProgram = (PFNGLDELETEPROGRAMPROC) load(userptr, "glDeleteProgram");
+    glad_glDeleteRenderbuffers = (PFNGLDELETERENDERBUFFERSPROC) load(userptr, "glDeleteRenderbuffers");
+    glad_glDeleteShader = (PFNGLDELETESHADERPROC) load(userptr, "glDeleteShader");
+    glad_glDeleteTextures = (PFNGLDELETETEXTURESPROC) load(userptr, "glDeleteTextures");
+    glad_glDepthFunc = (PFNGLDEPTHFUNCPROC) load(userptr, "glDepthFunc");
+    glad_glDepthMask = (PFNGLDEPTHMASKPROC) load(userptr, "glDepthMask");
+    glad_glDepthRangef = (PFNGLDEPTHRANGEFPROC) load(userptr, "glDepthRangef");
+    glad_glDetachShader = (PFNGLDETACHSHADERPROC) load(userptr, "glDetachShader");
+    glad_glDisable = (PFNGLDISABLEPROC) load(userptr, "glDisable");
+    glad_glDisableVertexAttribArray = (PFNGLDISABLEVERTEXATTRIBARRAYPROC) load(userptr, "glDisableVertexAttribArray");
+    glad_glDrawArrays = (PFNGLDRAWARRAYSPROC) load(userptr, "glDrawArrays");
+    glad_glDrawElements = (PFNGLDRAWELEMENTSPROC) load(userptr, "glDrawElements");
+    glad_glEnable = (PFNGLENABLEPROC) load(userptr, "glEnable");
+    glad_glEnableVertexAttribArray = (PFNGLENABLEVERTEXATTRIBARRAYPROC) load(userptr, "glEnableVertexAttribArray");
+    glad_glFinish = (PFNGLFINISHPROC) load(userptr, "glFinish");
+    glad_glFlush = (PFNGLFLUSHPROC) load(userptr, "glFlush");
+    glad_glFramebufferRenderbuffer = (PFNGLFRAMEBUFFERRENDERBUFFERPROC) load(userptr, "glFramebufferRenderbuffer");
+    glad_glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC) load(userptr, "glFramebufferTexture2D");
+    glad_glFrontFace = (PFNGLFRONTFACEPROC) load(userptr, "glFrontFace");
+    glad_glGenBuffers = (PFNGLGENBUFFERSPROC) load(userptr, "glGenBuffers");
+    glad_glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC) load(userptr, "glGenFramebuffers");
+    glad_glGenRenderbuffers = (PFNGLGENRENDERBUFFERSPROC) load(userptr, "glGenRenderbuffers");
+    glad_glGenTextures = (PFNGLGENTEXTURESPROC) load(userptr, "glGenTextures");
+    glad_glGenerateMipmap = (PFNGLGENERATEMIPMAPPROC) load(userptr, "glGenerateMipmap");
+    glad_glGetActiveAttrib = (PFNGLGETACTIVEATTRIBPROC) load(userptr, "glGetActiveAttrib");
+    glad_glGetActiveUniform = (PFNGLGETACTIVEUNIFORMPROC) load(userptr, "glGetActiveUniform");
+    glad_glGetAttachedShaders = (PFNGLGETATTACHEDSHADERSPROC) load(userptr, "glGetAttachedShaders");
+    glad_glGetAttribLocation = (PFNGLGETATTRIBLOCATIONPROC) load(userptr, "glGetAttribLocation");
+    glad_glGetBooleanv = (PFNGLGETBOOLEANVPROC) load(userptr, "glGetBooleanv");
+    glad_glGetBufferParameteriv = (PFNGLGETBUFFERPARAMETERIVPROC) load(userptr, "glGetBufferParameteriv");
+    glad_glGetError = (PFNGLGETERRORPROC) load(userptr, "glGetError");
+    glad_glGetFloatv = (PFNGLGETFLOATVPROC) load(userptr, "glGetFloatv");
+    glad_glGetFramebufferAttachmentParameteriv = (PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVPROC) load(userptr, "glGetFramebufferAttachmentParameteriv");
+    glad_glGetIntegerv = (PFNGLGETINTEGERVPROC) load(userptr, "glGetIntegerv");
+    glad_glGetProgramInfoLog = (PFNGLGETPROGRAMINFOLOGPROC) load(userptr, "glGetProgramInfoLog");
+    glad_glGetProgramiv = (PFNGLGETPROGRAMIVPROC) load(userptr, "glGetProgramiv");
+    glad_glGetRenderbufferParameteriv = (PFNGLGETRENDERBUFFERPARAMETERIVPROC) load(userptr, "glGetRenderbufferParameteriv");
+    glad_glGetShaderInfoLog = (PFNGLGETSHADERINFOLOGPROC) load(userptr, "glGetShaderInfoLog");
+    glad_glGetShaderPrecisionFormat = (PFNGLGETSHADERPRECISIONFORMATPROC) load(userptr, "glGetShaderPrecisionFormat");
+    glad_glGetShaderSource = (PFNGLGETSHADERSOURCEPROC) load(userptr, "glGetShaderSource");
+    glad_glGetShaderiv = (PFNGLGETSHADERIVPROC) load(userptr, "glGetShaderiv");
+    glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString");
+    glad_glGetTexParameterfv = (PFNGLGETTEXPARAMETERFVPROC) load(userptr, "glGetTexParameterfv");
+    glad_glGetTexParameteriv = (PFNGLGETTEXPARAMETERIVPROC) load(userptr, "glGetTexParameteriv");
+    glad_glGetUniformLocation = (PFNGLGETUNIFORMLOCATIONPROC) load(userptr, "glGetUniformLocation");
+    glad_glGetUniformfv = (PFNGLGETUNIFORMFVPROC) load(userptr, "glGetUniformfv");
+    glad_glGetUniformiv = (PFNGLGETUNIFORMIVPROC) load(userptr, "glGetUniformiv");
+    glad_glGetVertexAttribPointerv = (PFNGLGETVERTEXATTRIBPOINTERVPROC) load(userptr, "glGetVertexAttribPointerv");
+    glad_glGetVertexAttribfv = (PFNGLGETVERTEXATTRIBFVPROC) load(userptr, "glGetVertexAttribfv");
+    glad_glGetVertexAttribiv = (PFNGLGETVERTEXATTRIBIVPROC) load(userptr, "glGetVertexAttribiv");
+    glad_glHint = (PFNGLHINTPROC) load(userptr, "glHint");
+    glad_glIsBuffer = (PFNGLISBUFFERPROC) load(userptr, "glIsBuffer");
+    glad_glIsEnabled = (PFNGLISENABLEDPROC) load(userptr, "glIsEnabled");
+    glad_glIsFramebuffer = (PFNGLISFRAMEBUFFERPROC) load(userptr, "glIsFramebuffer");
+    glad_glIsProgram = (PFNGLISPROGRAMPROC) load(userptr, "glIsProgram");
+    glad_glIsRenderbuffer = (PFNGLISRENDERBUFFERPROC) load(userptr, "glIsRenderbuffer");
+    glad_glIsShader = (PFNGLISSHADERPROC) load(userptr, "glIsShader");
+    glad_glIsTexture = (PFNGLISTEXTUREPROC) load(userptr, "glIsTexture");
+    glad_glLineWidth = (PFNGLLINEWIDTHPROC) load(userptr, "glLineWidth");
+    glad_glLinkProgram = (PFNGLLINKPROGRAMPROC) load(userptr, "glLinkProgram");
+    glad_glPixelStorei = (PFNGLPIXELSTOREIPROC) load(userptr, "glPixelStorei");
+    glad_glPolygonOffset = (PFNGLPOLYGONOFFSETPROC) load(userptr, "glPolygonOffset");
+    glad_glReadPixels = (PFNGLREADPIXELSPROC) load(userptr, "glReadPixels");
+    glad_glReleaseShaderCompiler = (PFNGLRELEASESHADERCOMPILERPROC) load(userptr, "glReleaseShaderCompiler");
+    glad_glRenderbufferStorage = (PFNGLRENDERBUFFERSTORAGEPROC) load(userptr, "glRenderbufferStorage");
+    glad_glSampleCoverage = (PFNGLSAMPLECOVERAGEPROC) load(userptr, "glSampleCoverage");
+    glad_glScissor = (PFNGLSCISSORPROC) load(userptr, "glScissor");
+    glad_glShaderBinary = (PFNGLSHADERBINARYPROC) load(userptr, "glShaderBinary");
+    glad_glShaderSource = (PFNGLSHADERSOURCEPROC) load(userptr, "glShaderSource");
+    glad_glStencilFunc = (PFNGLSTENCILFUNCPROC) load(userptr, "glStencilFunc");
+    glad_glStencilFuncSeparate = (PFNGLSTENCILFUNCSEPARATEPROC) load(userptr, "glStencilFuncSeparate");
+    glad_glStencilMask = (PFNGLSTENCILMASKPROC) load(userptr, "glStencilMask");
+    glad_glStencilMaskSeparate = (PFNGLSTENCILMASKSEPARATEPROC) load(userptr, "glStencilMaskSeparate");
+    glad_glStencilOp = (PFNGLSTENCILOPPROC) load(userptr, "glStencilOp");
+    glad_glStencilOpSeparate = (PFNGLSTENCILOPSEPARATEPROC) load(userptr, "glStencilOpSeparate");
+    glad_glTexImage2D = (PFNGLTEXIMAGE2DPROC) load(userptr, "glTexImage2D");
+    glad_glTexParameterf = (PFNGLTEXPARAMETERFPROC) load(userptr, "glTexParameterf");
+    glad_glTexParameterfv = (PFNGLTEXPARAMETERFVPROC) load(userptr, "glTexParameterfv");
+    glad_glTexParameteri = (PFNGLTEXPARAMETERIPROC) load(userptr, "glTexParameteri");
+    glad_glTexParameteriv = (PFNGLTEXPARAMETERIVPROC) load(userptr, "glTexParameteriv");
+    glad_glTexSubImage2D = (PFNGLTEXSUBIMAGE2DPROC) load(userptr, "glTexSubImage2D");
+    glad_glUniform1f = (PFNGLUNIFORM1FPROC) load(userptr, "glUniform1f");
+    glad_glUniform1fv = (PFNGLUNIFORM1FVPROC) load(userptr, "glUniform1fv");
+    glad_glUniform1i = (PFNGLUNIFORM1IPROC) load(userptr, "glUniform1i");
+    glad_glUniform1iv = (PFNGLUNIFORM1IVPROC) load(userptr, "glUniform1iv");
+    glad_glUniform2f = (PFNGLUNIFORM2FPROC) load(userptr, "glUniform2f");
+    glad_glUniform2fv = (PFNGLUNIFORM2FVPROC) load(userptr, "glUniform2fv");
+    glad_glUniform2i = (PFNGLUNIFORM2IPROC) load(userptr, "glUniform2i");
+    glad_glUniform2iv = (PFNGLUNIFORM2IVPROC) load(userptr, "glUniform2iv");
+    glad_glUniform3f = (PFNGLUNIFORM3FPROC) load(userptr, "glUniform3f");
+    glad_glUniform3fv = (PFNGLUNIFORM3FVPROC) load(userptr, "glUniform3fv");
+    glad_glUniform3i = (PFNGLUNIFORM3IPROC) load(userptr, "glUniform3i");
+    glad_glUniform3iv = (PFNGLUNIFORM3IVPROC) load(userptr, "glUniform3iv");
+    glad_glUniform4f = (PFNGLUNIFORM4FPROC) load(userptr, "glUniform4f");
+    glad_glUniform4fv = (PFNGLUNIFORM4FVPROC) load(userptr, "glUniform4fv");
+    glad_glUniform4i = (PFNGLUNIFORM4IPROC) load(userptr, "glUniform4i");
+    glad_glUniform4iv = (PFNGLUNIFORM4IVPROC) load(userptr, "glUniform4iv");
+    glad_glUniformMatrix2fv = (PFNGLUNIFORMMATRIX2FVPROC) load(userptr, "glUniformMatrix2fv");
+    glad_glUniformMatrix3fv = (PFNGLUNIFORMMATRIX3FVPROC) load(userptr, "glUniformMatrix3fv");
+    glad_glUniformMatrix4fv = (PFNGLUNIFORMMATRIX4FVPROC) load(userptr, "glUniformMatrix4fv");
+    glad_glUseProgram = (PFNGLUSEPROGRAMPROC) load(userptr, "glUseProgram");
+    glad_glValidateProgram = (PFNGLVALIDATEPROGRAMPROC) load(userptr, "glValidateProgram");
+    glad_glVertexAttrib1f = (PFNGLVERTEXATTRIB1FPROC) load(userptr, "glVertexAttrib1f");
+    glad_glVertexAttrib1fv = (PFNGLVERTEXATTRIB1FVPROC) load(userptr, "glVertexAttrib1fv");
+    glad_glVertexAttrib2f = (PFNGLVERTEXATTRIB2FPROC) load(userptr, "glVertexAttrib2f");
+    glad_glVertexAttrib2fv = (PFNGLVERTEXATTRIB2FVPROC) load(userptr, "glVertexAttrib2fv");
+    glad_glVertexAttrib3f = (PFNGLVERTEXATTRIB3FPROC) load(userptr, "glVertexAttrib3f");
+    glad_glVertexAttrib3fv = (PFNGLVERTEXATTRIB3FVPROC) load(userptr, "glVertexAttrib3fv");
+    glad_glVertexAttrib4f = (PFNGLVERTEXATTRIB4FPROC) load(userptr, "glVertexAttrib4f");
+    glad_glVertexAttrib4fv = (PFNGLVERTEXATTRIB4FVPROC) load(userptr, "glVertexAttrib4fv");
+    glad_glVertexAttribPointer = (PFNGLVERTEXATTRIBPOINTERPROC) load(userptr, "glVertexAttribPointer");
+    glad_glViewport = (PFNGLVIEWPORTPROC) load(userptr, "glViewport");
+}
+static void glad_gl_load_GL_ES_VERSION_3_0( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_GL_ES_VERSION_3_0) return;
+    glad_glBeginQuery = (PFNGLBEGINQUERYPROC) load(userptr, "glBeginQuery");
+    glad_glBeginTransformFeedback = (PFNGLBEGINTRANSFORMFEEDBACKPROC) load(userptr, "glBeginTransformFeedback");
+    glad_glBindBufferBase = (PFNGLBINDBUFFERBASEPROC) load(userptr, "glBindBufferBase");
+    glad_glBindBufferRange = (PFNGLBINDBUFFERRANGEPROC) load(userptr, "glBindBufferRange");
+    glad_glBindSampler = (PFNGLBINDSAMPLERPROC) load(userptr, "glBindSampler");
+    glad_glBindTransformFeedback = (PFNGLBINDTRANSFORMFEEDBACKPROC) load(userptr, "glBindTransformFeedback");
+    glad_glBindVertexArray = (PFNGLBINDVERTEXARRAYPROC) load(userptr, "glBindVertexArray");
+    glad_glBlitFramebuffer = (PFNGLBLITFRAMEBUFFERPROC) load(userptr, "glBlitFramebuffer");
+    glad_glClearBufferfi = (PFNGLCLEARBUFFERFIPROC) load(userptr, "glClearBufferfi");
+    glad_glClearBufferfv = (PFNGLCLEARBUFFERFVPROC) load(userptr, "glClearBufferfv");
+    glad_glClearBufferiv = (PFNGLCLEARBUFFERIVPROC) load(userptr, "glClearBufferiv");
+    glad_glClearBufferuiv = (PFNGLCLEARBUFFERUIVPROC) load(userptr, "glClearBufferuiv");
+    glad_glClientWaitSync = (PFNGLCLIENTWAITSYNCPROC) load(userptr, "glClientWaitSync");
+    glad_glCompressedTexImage3D = (PFNGLCOMPRESSEDTEXIMAGE3DPROC) load(userptr, "glCompressedTexImage3D");
+    glad_glCompressedTexSubImage3D = (PFNGLCOMPRESSEDTEXSUBIMAGE3DPROC) load(userptr, "glCompressedTexSubImage3D");
+    glad_glCopyBufferSubData = (PFNGLCOPYBUFFERSUBDATAPROC) load(userptr, "glCopyBufferSubData");
+    glad_glCopyTexSubImage3D = (PFNGLCOPYTEXSUBIMAGE3DPROC) load(userptr, "glCopyTexSubImage3D");
+    glad_glDeleteQueries = (PFNGLDELETEQUERIESPROC) load(userptr, "glDeleteQueries");
+    glad_glDeleteSamplers = (PFNGLDELETESAMPLERSPROC) load(userptr, "glDeleteSamplers");
+    glad_glDeleteSync = (PFNGLDELETESYNCPROC) load(userptr, "glDeleteSync");
+    glad_glDeleteTransformFeedbacks = (PFNGLDELETETRANSFORMFEEDBACKSPROC) load(userptr, "glDeleteTransformFeedbacks");
+    glad_glDeleteVertexArrays = (PFNGLDELETEVERTEXARRAYSPROC) load(userptr, "glDeleteVertexArrays");
+    glad_glDrawArraysInstanced = (PFNGLDRAWARRAYSINSTANCEDPROC) load(userptr, "glDrawArraysInstanced");
+    glad_glDrawBuffers = (PFNGLDRAWBUFFERSPROC) load(userptr, "glDrawBuffers");
+    glad_glDrawElementsInstanced = (PFNGLDRAWELEMENTSINSTANCEDPROC) load(userptr, "glDrawElementsInstanced");
+    glad_glDrawRangeElements = (PFNGLDRAWRANGEELEMENTSPROC) load(userptr, "glDrawRangeElements");
+    glad_glEndQuery = (PFNGLENDQUERYPROC) load(userptr, "glEndQuery");
+    glad_glEndTransformFeedback = (PFNGLENDTRANSFORMFEEDBACKPROC) load(userptr, "glEndTransformFeedback");
+    glad_glFenceSync = (PFNGLFENCESYNCPROC) load(userptr, "glFenceSync");
+    glad_glFlushMappedBufferRange = (PFNGLFLUSHMAPPEDBUFFERRANGEPROC) load(userptr, "glFlushMappedBufferRange");
+    glad_glFramebufferTextureLayer = (PFNGLFRAMEBUFFERTEXTURELAYERPROC) load(userptr, "glFramebufferTextureLayer");
+    glad_glGenQueries = (PFNGLGENQUERIESPROC) load(userptr, "glGenQueries");
+    glad_glGenSamplers = (PFNGLGENSAMPLERSPROC) load(userptr, "glGenSamplers");
+    glad_glGenTransformFeedbacks = (PFNGLGENTRANSFORMFEEDBACKSPROC) load(userptr, "glGenTransformFeedbacks");
+    glad_glGenVertexArrays = (PFNGLGENVERTEXARRAYSPROC) load(userptr, "glGenVertexArrays");
+    glad_glGetActiveUniformBlockName = (PFNGLGETACTIVEUNIFORMBLOCKNAMEPROC) load(userptr, "glGetActiveUniformBlockName");
+    glad_glGetActiveUniformBlockiv = (PFNGLGETACTIVEUNIFORMBLOCKIVPROC) load(userptr, "glGetActiveUniformBlockiv");
+    glad_glGetActiveUniformsiv = (PFNGLGETACTIVEUNIFORMSIVPROC) load(userptr, "glGetActiveUniformsiv");
+    glad_glGetBufferParameteri64v = (PFNGLGETBUFFERPARAMETERI64VPROC) load(userptr, "glGetBufferParameteri64v");
+    glad_glGetBufferPointerv = (PFNGLGETBUFFERPOINTERVPROC) load(userptr, "glGetBufferPointerv");
+    glad_glGetFragDataLocation = (PFNGLGETFRAGDATALOCATIONPROC) load(userptr, "glGetFragDataLocation");
+    glad_glGetInteger64i_v = (PFNGLGETINTEGER64I_VPROC) load(userptr, "glGetInteger64i_v");
+    glad_glGetInteger64v = (PFNGLGETINTEGER64VPROC) load(userptr, "glGetInteger64v");
+    glad_glGetIntegeri_v = (PFNGLGETINTEGERI_VPROC) load(userptr, "glGetIntegeri_v");
+    glad_glGetInternalformativ = (PFNGLGETINTERNALFORMATIVPROC) load(userptr, "glGetInternalformativ");
+    glad_glGetProgramBinary = (PFNGLGETPROGRAMBINARYPROC) load(userptr, "glGetProgramBinary");
+    glad_glGetQueryObjectuiv = (PFNGLGETQUERYOBJECTUIVPROC) load(userptr, "glGetQueryObjectuiv");
+    glad_glGetQueryiv = (PFNGLGETQUERYIVPROC) load(userptr, "glGetQueryiv");
+    glad_glGetSamplerParameterfv = (PFNGLGETSAMPLERPARAMETERFVPROC) load(userptr, "glGetSamplerParameterfv");
+    glad_glGetSamplerParameteriv = (PFNGLGETSAMPLERPARAMETERIVPROC) load(userptr, "glGetSamplerParameteriv");
+    glad_glGetStringi = (PFNGLGETSTRINGIPROC) load(userptr, "glGetStringi");
+    glad_glGetSynciv = (PFNGLGETSYNCIVPROC) load(userptr, "glGetSynciv");
+    glad_glGetTransformFeedbackVarying = (PFNGLGETTRANSFORMFEEDBACKVARYINGPROC) load(userptr, "glGetTransformFeedbackVarying");
+    glad_glGetUniformBlockIndex = (PFNGLGETUNIFORMBLOCKINDEXPROC) load(userptr, "glGetUniformBlockIndex");
+    glad_glGetUniformIndices = (PFNGLGETUNIFORMINDICESPROC) load(userptr, "glGetUniformIndices");
+    glad_glGetUniformuiv = (PFNGLGETUNIFORMUIVPROC) load(userptr, "glGetUniformuiv");
+    glad_glGetVertexAttribIiv = (PFNGLGETVERTEXATTRIBIIVPROC) load(userptr, "glGetVertexAttribIiv");
+    glad_glGetVertexAttribIuiv = (PFNGLGETVERTEXATTRIBIUIVPROC) load(userptr, "glGetVertexAttribIuiv");
+    glad_glInvalidateFramebuffer = (PFNGLINVALIDATEFRAMEBUFFERPROC) load(userptr, "glInvalidateFramebuffer");
+    glad_glInvalidateSubFramebuffer = (PFNGLINVALIDATESUBFRAMEBUFFERPROC) load(userptr, "glInvalidateSubFramebuffer");
+    glad_glIsQuery = (PFNGLISQUERYPROC) load(userptr, "glIsQuery");
+    glad_glIsSampler = (PFNGLISSAMPLERPROC) load(userptr, "glIsSampler");
+    glad_glIsSync = (PFNGLISSYNCPROC) load(userptr, "glIsSync");
+    glad_glIsTransformFeedback = (PFNGLISTRANSFORMFEEDBACKPROC) load(userptr, "glIsTransformFeedback");
+    glad_glIsVertexArray = (PFNGLISVERTEXARRAYPROC) load(userptr, "glIsVertexArray");
+    glad_glMapBufferRange = (PFNGLMAPBUFFERRANGEPROC) load(userptr, "glMapBufferRange");
+    glad_glPauseTransformFeedback = (PFNGLPAUSETRANSFORMFEEDBACKPROC) load(userptr, "glPauseTransformFeedback");
+    glad_glProgramBinary = (PFNGLPROGRAMBINARYPROC) load(userptr, "glProgramBinary");
+    glad_glProgramParameteri = (PFNGLPROGRAMPARAMETERIPROC) load(userptr, "glProgramParameteri");
+    glad_glReadBuffer = (PFNGLREADBUFFERPROC) load(userptr, "glReadBuffer");
+    glad_glRenderbufferStorageMultisample = (PFNGLRENDERBUFFERSTORAGEMULTISAMPLEPROC) load(userptr, "glRenderbufferStorageMultisample");
+    glad_glResumeTransformFeedback = (PFNGLRESUMETRANSFORMFEEDBACKPROC) load(userptr, "glResumeTransformFeedback");
+    glad_glSamplerParameterf = (PFNGLSAMPLERPARAMETERFPROC) load(userptr, "glSamplerParameterf");
+    glad_glSamplerParameterfv = (PFNGLSAMPLERPARAMETERFVPROC) load(userptr, "glSamplerParameterfv");
+    glad_glSamplerParameteri = (PFNGLSAMPLERPARAMETERIPROC) load(userptr, "glSamplerParameteri");
+    glad_glSamplerParameteriv = (PFNGLSAMPLERPARAMETERIVPROC) load(userptr, "glSamplerParameteriv");
+    glad_glTexImage3D = (PFNGLTEXIMAGE3DPROC) load(userptr, "glTexImage3D");
+    glad_glTexStorage2D = (PFNGLTEXSTORAGE2DPROC) load(userptr, "glTexStorage2D");
+    glad_glTexStorage3D = (PFNGLTEXSTORAGE3DPROC) load(userptr, "glTexStorage3D");
+    glad_glTexSubImage3D = (PFNGLTEXSUBIMAGE3DPROC) load(userptr, "glTexSubImage3D");
+    glad_glTransformFeedbackVaryings = (PFNGLTRANSFORMFEEDBACKVARYINGSPROC) load(userptr, "glTransformFeedbackVaryings");
+    glad_glUniform1ui = (PFNGLUNIFORM1UIPROC) load(userptr, "glUniform1ui");
+    glad_glUniform1uiv = (PFNGLUNIFORM1UIVPROC) load(userptr, "glUniform1uiv");
+    glad_glUniform2ui = (PFNGLUNIFORM2UIPROC) load(userptr, "glUniform2ui");
+    glad_glUniform2uiv = (PFNGLUNIFORM2UIVPROC) load(userptr, "glUniform2uiv");
+    glad_glUniform3ui = (PFNGLUNIFORM3UIPROC) load(userptr, "glUniform3ui");
+    glad_glUniform3uiv = (PFNGLUNIFORM3UIVPROC) load(userptr, "glUniform3uiv");
+    glad_glUniform4ui = (PFNGLUNIFORM4UIPROC) load(userptr, "glUniform4ui");
+    glad_glUniform4uiv = (PFNGLUNIFORM4UIVPROC) load(userptr, "glUniform4uiv");
+    glad_glUniformBlockBinding = (PFNGLUNIFORMBLOCKBINDINGPROC) load(userptr, "glUniformBlockBinding");
+    glad_glUniformMatrix2x3fv = (PFNGLUNIFORMMATRIX2X3FVPROC) load(userptr, "glUniformMatrix2x3fv");
+    glad_glUniformMatrix2x4fv = (PFNGLUNIFORMMATRIX2X4FVPROC) load(userptr, "glUniformMatrix2x4fv");
+    glad_glUniformMatrix3x2fv = (PFNGLUNIFORMMATRIX3X2FVPROC) load(userptr, "glUniformMatrix3x2fv");
+    glad_glUniformMatrix3x4fv = (PFNGLUNIFORMMATRIX3X4FVPROC) load(userptr, "glUniformMatrix3x4fv");
+    glad_glUniformMatrix4x2fv = (PFNGLUNIFORMMATRIX4X2FVPROC) load(userptr, "glUniformMatrix4x2fv");
+    glad_glUniformMatrix4x3fv = (PFNGLUNIFORMMATRIX4X3FVPROC) load(userptr, "glUniformMatrix4x3fv");
+    glad_glUnmapBuffer = (PFNGLUNMAPBUFFERPROC) load(userptr, "glUnmapBuffer");
+    glad_glVertexAttribDivisor = (PFNGLVERTEXATTRIBDIVISORPROC) load(userptr, "glVertexAttribDivisor");
+    glad_glVertexAttribI4i = (PFNGLVERTEXATTRIBI4IPROC) load(userptr, "glVertexAttribI4i");
+    glad_glVertexAttribI4iv = (PFNGLVERTEXATTRIBI4IVPROC) load(userptr, "glVertexAttribI4iv");
+    glad_glVertexAttribI4ui = (PFNGLVERTEXATTRIBI4UIPROC) load(userptr, "glVertexAttribI4ui");
+    glad_glVertexAttribI4uiv = (PFNGLVERTEXATTRIBI4UIVPROC) load(userptr, "glVertexAttribI4uiv");
+    glad_glVertexAttribIPointer = (PFNGLVERTEXATTRIBIPOINTERPROC) load(userptr, "glVertexAttribIPointer");
+    glad_glWaitSync = (PFNGLWAITSYNCPROC) load(userptr, "glWaitSync");
+}
+static void glad_gl_load_GL_ES_VERSION_3_1( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_GL_ES_VERSION_3_1) return;
+    glad_glActiveShaderProgram = (PFNGLACTIVESHADERPROGRAMPROC) load(userptr, "glActiveShaderProgram");
+    glad_glBindImageTexture = (PFNGLBINDIMAGETEXTUREPROC) load(userptr, "glBindImageTexture");
+    glad_glBindProgramPipeline = (PFNGLBINDPROGRAMPIPELINEPROC) load(userptr, "glBindProgramPipeline");
+    glad_glBindVertexBuffer = (PFNGLBINDVERTEXBUFFERPROC) load(userptr, "glBindVertexBuffer");
+    glad_glCreateShaderProgramv = (PFNGLCREATESHADERPROGRAMVPROC) load(userptr, "glCreateShaderProgramv");
+    glad_glDeleteProgramPipelines = (PFNGLDELETEPROGRAMPIPELINESPROC) load(userptr, "glDeleteProgramPipelines");
+    glad_glDispatchCompute = (PFNGLDISPATCHCOMPUTEPROC) load(userptr, "glDispatchCompute");
+    glad_glDispatchComputeIndirect = (PFNGLDISPATCHCOMPUTEINDIRECTPROC) load(userptr, "glDispatchComputeIndirect");
+    glad_glDrawArraysIndirect = (PFNGLDRAWARRAYSINDIRECTPROC) load(userptr, "glDrawArraysIndirect");
+    glad_glDrawElementsIndirect = (PFNGLDRAWELEMENTSINDIRECTPROC) load(userptr, "glDrawElementsIndirect");
+    glad_glFramebufferParameteri = (PFNGLFRAMEBUFFERPARAMETERIPROC) load(userptr, "glFramebufferParameteri");
+    glad_glGenProgramPipelines = (PFNGLGENPROGRAMPIPELINESPROC) load(userptr, "glGenProgramPipelines");
+    glad_glGetBooleani_v = (PFNGLGETBOOLEANI_VPROC) load(userptr, "glGetBooleani_v");
+    glad_glGetFramebufferParameteriv = (PFNGLGETFRAMEBUFFERPARAMETERIVPROC) load(userptr, "glGetFramebufferParameteriv");
+    glad_glGetMultisamplefv = (PFNGLGETMULTISAMPLEFVPROC) load(userptr, "glGetMultisamplefv");
+    glad_glGetProgramInterfaceiv = (PFNGLGETPROGRAMINTERFACEIVPROC) load(userptr, "glGetProgramInterfaceiv");
+    glad_glGetProgramPipelineInfoLog = (PFNGLGETPROGRAMPIPELINEINFOLOGPROC) load(userptr, "glGetProgramPipelineInfoLog");
+    glad_glGetProgramPipelineiv = (PFNGLGETPROGRAMPIPELINEIVPROC) load(userptr, "glGetProgramPipelineiv");
+    glad_glGetProgramResourceIndex = (PFNGLGETPROGRAMRESOURCEINDEXPROC) load(userptr, "glGetProgramResourceIndex");
+    glad_glGetProgramResourceLocation = (PFNGLGETPROGRAMRESOURCELOCATIONPROC) load(userptr, "glGetProgramResourceLocation");
+    glad_glGetProgramResourceName = (PFNGLGETPROGRAMRESOURCENAMEPROC) load(userptr, "glGetProgramResourceName");
+    glad_glGetProgramResourceiv = (PFNGLGETPROGRAMRESOURCEIVPROC) load(userptr, "glGetProgramResourceiv");
+    glad_glGetTexLevelParameterfv = (PFNGLGETTEXLEVELPARAMETERFVPROC) load(userptr, "glGetTexLevelParameterfv");
+    glad_glGetTexLevelParameteriv = (PFNGLGETTEXLEVELPARAMETERIVPROC) load(userptr, "glGetTexLevelParameteriv");
+    glad_glIsProgramPipeline = (PFNGLISPROGRAMPIPELINEPROC) load(userptr, "glIsProgramPipeline");
+    glad_glMemoryBarrier = (PFNGLMEMORYBARRIERPROC) load(userptr, "glMemoryBarrier");
+    glad_glMemoryBarrierByRegion = (PFNGLMEMORYBARRIERBYREGIONPROC) load(userptr, "glMemoryBarrierByRegion");
+    glad_glProgramUniform1f = (PFNGLPROGRAMUNIFORM1FPROC) load(userptr, "glProgramUniform1f");
+    glad_glProgramUniform1fv = (PFNGLPROGRAMUNIFORM1FVPROC) load(userptr, "glProgramUniform1fv");
+    glad_glProgramUniform1i = (PFNGLPROGRAMUNIFORM1IPROC) load(userptr, "glProgramUniform1i");
+    glad_glProgramUniform1iv = (PFNGLPROGRAMUNIFORM1IVPROC) load(userptr, "glProgramUniform1iv");
+    glad_glProgramUniform1ui = (PFNGLPROGRAMUNIFORM1UIPROC) load(userptr, "glProgramUniform1ui");
+    glad_glProgramUniform1uiv = (PFNGLPROGRAMUNIFORM1UIVPROC) load(userptr, "glProgramUniform1uiv");
+    glad_glProgramUniform2f = (PFNGLPROGRAMUNIFORM2FPROC) load(userptr, "glProgramUniform2f");
+    glad_glProgramUniform2fv = (PFNGLPROGRAMUNIFORM2FVPROC) load(userptr, "glProgramUniform2fv");
+    glad_glProgramUniform2i = (PFNGLPROGRAMUNIFORM2IPROC) load(userptr, "glProgramUniform2i");
+    glad_glProgramUniform2iv = (PFNGLPROGRAMUNIFORM2IVPROC) load(userptr, "glProgramUniform2iv");
+    glad_glProgramUniform2ui = (PFNGLPROGRAMUNIFORM2UIPROC) load(userptr, "glProgramUniform2ui");
+    glad_glProgramUniform2uiv = (PFNGLPROGRAMUNIFORM2UIVPROC) load(userptr, "glProgramUniform2uiv");
+    glad_glProgramUniform3f = (PFNGLPROGRAMUNIFORM3FPROC) load(userptr, "glProgramUniform3f");
+    glad_glProgramUniform3fv = (PFNGLPROGRAMUNIFORM3FVPROC) load(userptr, "glProgramUniform3fv");
+    glad_glProgramUniform3i = (PFNGLPROGRAMUNIFORM3IPROC) load(userptr, "glProgramUniform3i");
+    glad_glProgramUniform3iv = (PFNGLPROGRAMUNIFORM3IVPROC) load(userptr, "glProgramUniform3iv");
+    glad_glProgramUniform3ui = (PFNGLPROGRAMUNIFORM3UIPROC) load(userptr, "glProgramUniform3ui");
+    glad_glProgramUniform3uiv = (PFNGLPROGRAMUNIFORM3UIVPROC) load(userptr, "glProgramUniform3uiv");
+    glad_glProgramUniform4f = (PFNGLPROGRAMUNIFORM4FPROC) load(userptr, "glProgramUniform4f");
+    glad_glProgramUniform4fv = (PFNGLPROGRAMUNIFORM4FVPROC) load(userptr, "glProgramUniform4fv");
+    glad_glProgramUniform4i = (PFNGLPROGRAMUNIFORM4IPROC) load(userptr, "glProgramUniform4i");
+    glad_glProgramUniform4iv = (PFNGLPROGRAMUNIFORM4IVPROC) load(userptr, "glProgramUniform4iv");
+    glad_glProgramUniform4ui = (PFNGLPROGRAMUNIFORM4UIPROC) load(userptr, "glProgramUniform4ui");
+    glad_glProgramUniform4uiv = (PFNGLPROGRAMUNIFORM4UIVPROC) load(userptr, "glProgramUniform4uiv");
+    glad_glProgramUniformMatrix2fv = (PFNGLPROGRAMUNIFORMMATRIX2FVPROC) load(userptr, "glProgramUniformMatrix2fv");
+    glad_glProgramUniformMatrix2x3fv = (PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC) load(userptr, "glProgramUniformMatrix2x3fv");
+    glad_glProgramUniformMatrix2x4fv = (PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC) load(userptr, "glProgramUniformMatrix2x4fv");
+    glad_glProgramUniformMatrix3fv = (PFNGLPROGRAMUNIFORMMATRIX3FVPROC) load(userptr, "glProgramUniformMatrix3fv");
+    glad_glProgramUniformMatrix3x2fv = (PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC) load(userptr, "glProgramUniformMatrix3x2fv");
+    glad_glProgramUniformMatrix3x4fv = (PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC) load(userptr, "glProgramUniformMatrix3x4fv");
+    glad_glProgramUniformMatrix4fv = (PFNGLPROGRAMUNIFORMMATRIX4FVPROC) load(userptr, "glProgramUniformMatrix4fv");
+    glad_glProgramUniformMatrix4x2fv = (PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC) load(userptr, "glProgramUniformMatrix4x2fv");
+    glad_glProgramUniformMatrix4x3fv = (PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC) load(userptr, "glProgramUniformMatrix4x3fv");
+    glad_glSampleMaski = (PFNGLSAMPLEMASKIPROC) load(userptr, "glSampleMaski");
+    glad_glTexStorage2DMultisample = (PFNGLTEXSTORAGE2DMULTISAMPLEPROC) load(userptr, "glTexStorage2DMultisample");
+    glad_glUseProgramStages = (PFNGLUSEPROGRAMSTAGESPROC) load(userptr, "glUseProgramStages");
+    glad_glValidateProgramPipeline = (PFNGLVALIDATEPROGRAMPIPELINEPROC) load(userptr, "glValidateProgramPipeline");
+    glad_glVertexAttribBinding = (PFNGLVERTEXATTRIBBINDINGPROC) load(userptr, "glVertexAttribBinding");
+    glad_glVertexAttribFormat = (PFNGLVERTEXATTRIBFORMATPROC) load(userptr, "glVertexAttribFormat");
+    glad_glVertexAttribIFormat = (PFNGLVERTEXATTRIBIFORMATPROC) load(userptr, "glVertexAttribIFormat");
+    glad_glVertexBindingDivisor = (PFNGLVERTEXBINDINGDIVISORPROC) load(userptr, "glVertexBindingDivisor");
+}
+static void glad_gl_load_GL_ES_VERSION_3_2( GLADuserptrloadfunc load, void* userptr) {
+    if(!GLAD_GL_ES_VERSION_3_2) return;
+    glad_glBlendBarrier = (PFNGLBLENDBARRIERPROC) load(userptr, "glBlendBarrier");
+    glad_glBlendEquationSeparatei = (PFNGLBLENDEQUATIONSEPARATEIPROC) load(userptr, "glBlendEquationSeparatei");
+    glad_glBlendEquationi = (PFNGLBLENDEQUATIONIPROC) load(userptr, "glBlendEquationi");
+    glad_glBlendFuncSeparatei = (PFNGLBLENDFUNCSEPARATEIPROC) load(userptr, "glBlendFuncSeparatei");
+    glad_glBlendFunci = (PFNGLBLENDFUNCIPROC) load(userptr, "glBlendFunci");
+    glad_glColorMaski = (PFNGLCOLORMASKIPROC) load(userptr, "glColorMaski");
+    glad_glCopyImageSubData = (PFNGLCOPYIMAGESUBDATAPROC) load(userptr, "glCopyImageSubData");
+    glad_glDebugMessageCallback = (PFNGLDEBUGMESSAGECALLBACKPROC) load(userptr, "glDebugMessageCallback");
+    glad_glDebugMessageControl = (PFNGLDEBUGMESSAGECONTROLPROC) load(userptr, "glDebugMessageControl");
+    glad_glDebugMessageInsert = (PFNGLDEBUGMESSAGEINSERTPROC) load(userptr, "glDebugMessageInsert");
+    glad_glDisablei = (PFNGLDISABLEIPROC) load(userptr, "glDisablei");
+    glad_glDrawElementsBaseVertex = (PFNGLDRAWELEMENTSBASEVERTEXPROC) load(userptr, "glDrawElementsBaseVertex");
+    glad_glDrawElementsInstancedBaseVertex = (PFNGLDRAWELEMENTSINSTANCEDBASEVERTEXPROC) load(userptr, "glDrawElementsInstancedBaseVertex");
+    glad_glDrawRangeElementsBaseVertex = (PFNGLDRAWRANGEELEMENTSBASEVERTEXPROC) load(userptr, "glDrawRangeElementsBaseVertex");
+    glad_glEnablei = (PFNGLENABLEIPROC) load(userptr, "glEnablei");
+    glad_glFramebufferTexture = (PFNGLFRAMEBUFFERTEXTUREPROC) load(userptr, "glFramebufferTexture");
+    glad_glGetDebugMessageLog = (PFNGLGETDEBUGMESSAGELOGPROC) load(userptr, "glGetDebugMessageLog");
+    glad_glGetGraphicsResetStatus = (PFNGLGETGRAPHICSRESETSTATUSPROC) load(userptr, "glGetGraphicsResetStatus");
+    glad_glGetObjectLabel = (PFNGLGETOBJECTLABELPROC) load(userptr, "glGetObjectLabel");
+    glad_glGetObjectPtrLabel = (PFNGLGETOBJECTPTRLABELPROC) load(userptr, "glGetObjectPtrLabel");
+    glad_glGetPointerv = (PFNGLGETPOINTERVPROC) load(userptr, "glGetPointerv");
+    glad_glGetSamplerParameterIiv = (PFNGLGETSAMPLERPARAMETERIIVPROC) load(userptr, "glGetSamplerParameterIiv");
+    glad_glGetSamplerParameterIuiv = (PFNGLGETSAMPLERPARAMETERIUIVPROC) load(userptr, "glGetSamplerParameterIuiv");
+    glad_glGetTexParameterIiv = (PFNGLGETTEXPARAMETERIIVPROC) load(userptr, "glGetTexParameterIiv");
+    glad_glGetTexParameterIuiv = (PFNGLGETTEXPARAMETERIUIVPROC) load(userptr, "glGetTexParameterIuiv");
+    glad_glGetnUniformfv = (PFNGLGETNUNIFORMFVPROC) load(userptr, "glGetnUniformfv");
+    glad_glGetnUniformiv = (PFNGLGETNUNIFORMIVPROC) load(userptr, "glGetnUniformiv");
+    glad_glGetnUniformuiv = (PFNGLGETNUNIFORMUIVPROC) load(userptr, "glGetnUniformuiv");
+    glad_glIsEnabledi = (PFNGLISENABLEDIPROC) load(userptr, "glIsEnabledi");
+    glad_glMinSampleShading = (PFNGLMINSAMPLESHADINGPROC) load(userptr, "glMinSampleShading");
+    glad_glObjectLabel = (PFNGLOBJECTLABELPROC) load(userptr, "glObjectLabel");
+    glad_glObjectPtrLabel = (PFNGLOBJECTPTRLABELPROC) load(userptr, "glObjectPtrLabel");
+    glad_glPatchParameteri = (PFNGLPATCHPARAMETERIPROC) load(userptr, "glPatchParameteri");
+    glad_glPopDebugGroup = (PFNGLPOPDEBUGGROUPPROC) load(userptr, "glPopDebugGroup");
+    glad_glPrimitiveBoundingBox = (PFNGLPRIMITIVEBOUNDINGBOXPROC) load(userptr, "glPrimitiveBoundingBox");
+    glad_glPushDebugGroup = (PFNGLPUSHDEBUGGROUPPROC) load(userptr, "glPushDebugGroup");
+    glad_glReadnPixels = (PFNGLREADNPIXELSPROC) load(userptr, "glReadnPixels");
+    glad_glSamplerParameterIiv = (PFNGLSAMPLERPARAMETERIIVPROC) load(userptr, "glSamplerParameterIiv");
+    glad_glSamplerParameterIuiv = (PFNGLSAMPLERPARAMETERIUIVPROC) load(userptr, "glSamplerParameterIuiv");
+    glad_glTexBuffer = (PFNGLTEXBUFFERPROC) load(userptr, "glTexBuffer");
+    glad_glTexBufferRange = (PFNGLTEXBUFFERRANGEPROC) load(userptr, "glTexBufferRange");
+    glad_glTexParameterIiv = (PFNGLTEXPARAMETERIIVPROC) load(userptr, "glTexParameterIiv");
+    glad_glTexParameterIuiv = (PFNGLTEXPARAMETERIUIVPROC) load(userptr, "glTexParameterIuiv");
+    glad_glTexStorage3DMultisample = (PFNGLTEXSTORAGE3DMULTISAMPLEPROC) load(userptr, "glTexStorage3DMultisample");
+}
 static void glad_gl_load_GL_ARB_debug_output( GLADuserptrloadfunc load, void* userptr) {
     if(!GLAD_GL_ARB_debug_output) return;
     glad_glDebugMessageCallbackARB = (PFNGLDEBUGMESSAGECALLBACKARBPROC) load(userptr, "glDebugMessageCallbackARB");
@@ -1831,6 +2310,78 @@ int gladLoadGL( GLADloadfunc load) {
     return gladLoadGLUserPtr( glad_gl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load);
 }
 
+static int glad_gl_find_extensions_gles2( int version) {
+    const char *exts = NULL;
+    unsigned int num_exts_i = 0;
+    char **exts_i = NULL;
+    if (!glad_gl_get_extensions(version, &exts, &num_exts_i, &exts_i)) return 0;
+
+    GLAD_GL_OVR_multiview = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OVR_multiview");
+    GLAD_GL_OVR_multiview2 = glad_gl_has_extension(version, exts, num_exts_i, exts_i, "GL_OVR_multiview2");
+
+    glad_gl_free_extensions(exts_i, num_exts_i);
+
+    return 1;
+}
+
+static int glad_gl_find_core_gles2(void) {
+    int i;
+    const char* version;
+    const char* prefixes[] = {
+        "OpenGL ES-CM ",
+        "OpenGL ES-CL ",
+        "OpenGL ES ",
+        "OpenGL SC ",
+        NULL
+    };
+    int major = 0;
+    int minor = 0;
+    version = (const char*) glad_glGetString(GL_VERSION);
+    if (!version) return 0;
+    for (i = 0;  prefixes[i];  i++) {
+        const size_t length = strlen(prefixes[i]);
+        if (strncmp(version, prefixes[i], length) == 0) {
+            version += length;
+            break;
+        }
+    }
+
+    GLAD_IMPL_UTIL_SSCANF(version, "%d.%d", &major, &minor);
+
+    GLAD_GL_ES_VERSION_2_0 = (major == 2 && minor >= 0) || major > 2;
+    GLAD_GL_ES_VERSION_3_0 = (major == 3 && minor >= 0) || major > 3;
+    GLAD_GL_ES_VERSION_3_1 = (major == 3 && minor >= 1) || major > 3;
+    GLAD_GL_ES_VERSION_3_2 = (major == 3 && minor >= 2) || major > 3;
+
+    return GLAD_MAKE_VERSION(major, minor);
+}
+
+int gladLoadGLES2UserPtr( GLADuserptrloadfunc load, void *userptr) {
+    int version;
+
+    glad_glGetString = (PFNGLGETSTRINGPROC) load(userptr, "glGetString");
+    if(glad_glGetString == NULL) return 0;
+    if(glad_glGetString(GL_VERSION) == NULL) return 0;
+    version = glad_gl_find_core_gles2();
+
+    glad_gl_load_GL_ES_VERSION_2_0(load, userptr);
+    glad_gl_load_GL_ES_VERSION_3_0(load, userptr);
+    glad_gl_load_GL_ES_VERSION_3_1(load, userptr);
+    glad_gl_load_GL_ES_VERSION_3_2(load, userptr);
+
+    if (!glad_gl_find_extensions_gles2(version)) return 0;
+    glad_gl_load_GL_OVR_multiview(load, userptr);
+
+
+
+    return version;
+}
+
+
+int gladLoadGLES2( GLADloadfunc load) {
+    return gladLoadGLES2UserPtr( glad_gl_get_proc_from_userptr, GLAD_GNUC_EXTENSION (void*) load);
+}
+
 
 
  
@@ -1924,7 +2475,7 @@ static GLADapiproc glad_gl_get_proc(void *vuserptr, const char *name) {
     return result;
 }
 
-static void* _glad_GL_loader_handle = NULL;
+static void* _glad_gles_loader_handle = NULL;
 
 static void* glad_gl_dlopen_handle(void) {
 #if GLAD_PLATFORM_APPLE
@@ -1946,11 +2497,11 @@ static void* glad_gl_dlopen_handle(void) {
     };
 #endif
 
-    if (_glad_GL_loader_handle == NULL) {
-        _glad_GL_loader_handle = glad_get_dlopen_handle(NAMES, sizeof(NAMES) / sizeof(NAMES[0]));
+    if (_glad_gles_loader_handle == NULL) {
+        _glad_gles_loader_handle = glad_get_dlopen_handle(NAMES, sizeof(NAMES) / sizeof(NAMES[0]));
     }
 
-    return _glad_GL_loader_handle;
+    return _glad_gles_loader_handle;
 }
 
 static struct _glad_gl_userptr glad_gl_build_userptr(void *handle) {
@@ -1976,7 +2527,7 @@ int gladLoaderLoadGL(void) {
     int did_load = 0;
     struct _glad_gl_userptr userptr;
 
-    did_load = _glad_GL_loader_handle == NULL;
+    did_load = _glad_gles_loader_handle == NULL;
     handle = glad_gl_dlopen_handle();
     if (handle) {
         userptr = glad_gl_build_userptr(handle);
@@ -1994,13 +2545,199 @@ int gladLoaderLoadGL(void) {
 
 
 void gladLoaderUnloadGL(void) {
+    if (_glad_gles_loader_handle != NULL) {
+        glad_close_dlopen_handle(_glad_gles_loader_handle);
+        _glad_gles_loader_handle = NULL;
+    }
+}
+
+#endif /* GLAD_GL */
+#ifdef GLAD_GLES2
+
+#ifndef GLAD_LOADER_LIBRARY_C_
+#define GLAD_LOADER_LIBRARY_C_
+
+#include <stddef.h>
+#include <stdlib.h>
+
+#if GLAD_PLATFORM_WIN32
+#include <windows.h>
+#else
+#include <dlfcn.h>
+#endif
+
+
+static void* glad_get_dlopen_handle(const char *lib_names[], int length) {
+    void *handle = NULL;
+    int i;
+
+    for (i = 0; i < length; ++i) {
+#if GLAD_PLATFORM_WIN32
+  #if GLAD_PLATFORM_UWP
+        size_t buffer_size = (strlen(lib_names[i]) + 1) * sizeof(WCHAR);
+        LPWSTR buffer = (LPWSTR) malloc(buffer_size);
+        if (buffer != NULL) {
+            int ret = MultiByteToWideChar(CP_ACP, 0, lib_names[i], -1, buffer, buffer_size);
+            if (ret != 0) {
+                handle = (void*) LoadPackagedLibrary(buffer, 0);
+            }
+            free((void*) buffer);
+        }
+  #else
+        handle = (void*) LoadLibraryA(lib_names[i]);
+  #endif
+#else
+        handle = dlopen(lib_names[i], RTLD_LAZY | RTLD_LOCAL);
+#endif
+        if (handle != NULL) {
+            return handle;
+        }
+    }
+
+    return NULL;
+}
+
+static void glad_close_dlopen_handle(void* handle) {
+    if (handle != NULL) {
+#if GLAD_PLATFORM_WIN32
+        FreeLibrary((HMODULE) handle);
+#else
+        dlclose(handle);
+#endif
+    }
+}
+
+static GLADapiproc glad_dlsym_handle(void* handle, const char *name) {
+    if (handle == NULL) {
+        return NULL;
+    }
+
+#if GLAD_PLATFORM_WIN32
+    return (GLADapiproc) GetProcAddress((HMODULE) handle, name);
+#else
+    return GLAD_GNUC_EXTENSION (GLADapiproc) dlsym(handle, name);
+#endif
+}
+
+#endif /* GLAD_LOADER_LIBRARY_C_ */
+
+#if GLAD_PLATFORM_EMSCRIPTEN
+#ifndef GLAD_EGL_H_
+  typedef void (*__eglMustCastToProperFunctionPointerType)(void);
+  typedef __eglMustCastToProperFunctionPointerType (GLAD_API_PTR *PFNEGLGETPROCADDRESSPROC)(const char *name);
+#endif
+  extern __eglMustCastToProperFunctionPointerType emscripten_GetProcAddress(const char *name);
+#elif EGL_STATIC
+  typedef void (*__eglMustCastToProperFunctionPointerType)(void);
+  typedef __eglMustCastToProperFunctionPointerType (GLAD_API_PTR *PFNEGLGETPROCADDRESSPROC)(const char *name);
+  extern __eglMustCastToProperFunctionPointerType GLAD_API_PTR eglGetProcAddress(const char *name);
+#else
+  #include <glad/egl.h>
+#endif
+
+
+struct _glad_gles2_userptr {
+    void *handle;
+    PFNEGLGETPROCADDRESSPROC get_proc_address_ptr;
+};
+
+
+static GLADapiproc glad_gles2_get_proc(void *vuserptr, const char* name) {
+    struct _glad_gles2_userptr userptr = *(struct _glad_gles2_userptr*) vuserptr;
+    GLADapiproc result = NULL;
+
+#if GLAD_PLATFORM_EMSCRIPTEN
+    GLAD_UNUSED(glad_dlsym_handle);
+#else
+    result = glad_dlsym_handle(userptr.handle, name);
+#endif
+    if (result == NULL) {
+        result = userptr.get_proc_address_ptr(name);
+    }
+
+    return result;
+}
+
+static void* _glad_GL_loader_handle = NULL;
+
+static void* glad_gles2_dlopen_handle(void) {
+#if GLAD_PLATFORM_EMSCRIPTEN
+#elif GLAD_PLATFORM_APPLE
+    static const char *NAMES[] = {"libGLESv2.dylib"};
+#elif GLAD_PLATFORM_WIN32
+    static const char *NAMES[] = {"GLESv2.dll", "libGLESv2.dll"};
+#else
+    static const char *NAMES[] = {"libGLESv2.so.2", "libGLESv2.so"};
+#endif
+
+#if GLAD_PLATFORM_EMSCRIPTEN
+    GLAD_UNUSED(glad_get_dlopen_handle);
+    return NULL;
+#else
+    if (_glad_GL_loader_handle == NULL) {
+        _glad_GL_loader_handle = glad_get_dlopen_handle(NAMES, sizeof(NAMES) / sizeof(NAMES[0]));
+    }
+
+    return _glad_GL_loader_handle;
+#endif
+}
+
+static struct _glad_gles2_userptr glad_gles2_build_userptr(void *handle) {
+    struct _glad_gles2_userptr userptr;
+#if GLAD_PLATFORM_EMSCRIPTEN
+    GLAD_UNUSED(handle);
+    userptr.get_proc_address_ptr = emscripten_GetProcAddress;
+#else
+    userptr.handle = handle;
+    userptr.get_proc_address_ptr = eglGetProcAddress;
+#endif
+    return userptr;
+}
+
+int gladLoaderLoadGLES2(void) {
+    int version = 0;
+    void *handle = NULL;
+    int did_load = 0;
+    struct _glad_gles2_userptr userptr;
+
+#if GLAD_PLATFORM_EMSCRIPTEN
+    GLAD_UNUSED(handle);
+    GLAD_UNUSED(did_load);
+    GLAD_UNUSED(glad_gles2_dlopen_handle);
+    GLAD_UNUSED(glad_gles2_build_userptr);
+    userptr.get_proc_address_ptr = emscripten_GetProcAddress;
+    version = gladLoadGLES2UserPtr(glad_gles2_get_proc, &userptr);
+#else
+    if (eglGetProcAddress == NULL) {
+        return 0;
+    }
+
+    did_load = _glad_GL_loader_handle == NULL;
+    handle = glad_gles2_dlopen_handle();
+    if (handle != NULL) {
+        userptr = glad_gles2_build_userptr(handle);
+
+        version = gladLoadGLES2UserPtr(glad_gles2_get_proc, &userptr);
+
+        if (!version && did_load) {
+            gladLoaderUnloadGLES2();
+        }
+    }
+#endif
+
+    return version;
+}
+
+
+
+void gladLoaderUnloadGLES2(void) {
     if (_glad_GL_loader_handle != NULL) {
         glad_close_dlopen_handle(_glad_GL_loader_handle);
         _glad_GL_loader_handle = NULL;
     }
 }
 
-#endif /* GLAD_GL */
+#endif /* GLAD_GLES2 */
 
 #ifdef __cplusplus
 }
diff --git a/thirdparty/glad/glad/egl.h b/thirdparty/glad/glad/egl.h
new file mode 100644
index 0000000000..1bf35c1404
--- /dev/null
+++ b/thirdparty/glad/glad/egl.h
@@ -0,0 +1,562 @@
+/**
+ * Loader generated by glad 2.0.3 on Fri Feb  3 07:06:48 2023
+ *
+ * SPDX-License-Identifier: (WTFPL OR CC0-1.0) AND Apache-2.0
+ *
+ * Generator: C/C++
+ * Specification: egl
+ * Extensions: 3
+ *
+ * APIs:
+ *  - egl=1.5
+ *
+ * Options:
+ *  - ALIAS = False
+ *  - DEBUG = False
+ *  - HEADER_ONLY = False
+ *  - LOADER = True
+ *  - MX = False
+ *  - ON_DEMAND = False
+ *
+ * Commandline:
+ *    --api='egl=1.5' --extensions='EGL_ANDROID_blob_cache,EGL_KHR_platform_wayland,EGL_KHR_platform_x11' c --loader
+ *
+ * Online:
+ *    http://glad.sh/#api=egl%3D1.5&extensions=EGL_ANDROID_blob_cache%2CEGL_KHR_platform_wayland%2CEGL_KHR_platform_x11&generator=c&options=LOADER
+ *
+ */
+
+#ifndef GLAD_EGL_H_
+#define GLAD_EGL_H_
+
+
+#define GLAD_EGL
+#define GLAD_OPTION_EGL_LOADER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef GLAD_PLATFORM_H_
+#define GLAD_PLATFORM_H_
+
+#ifndef GLAD_PLATFORM_WIN32
+  #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__)
+    #define GLAD_PLATFORM_WIN32 1
+  #else
+    #define GLAD_PLATFORM_WIN32 0
+  #endif
+#endif
+
+#ifndef GLAD_PLATFORM_APPLE
+  #ifdef __APPLE__
+    #define GLAD_PLATFORM_APPLE 1
+  #else
+    #define GLAD_PLATFORM_APPLE 0
+  #endif
+#endif
+
+#ifndef GLAD_PLATFORM_EMSCRIPTEN
+  #ifdef __EMSCRIPTEN__
+    #define GLAD_PLATFORM_EMSCRIPTEN 1
+  #else
+    #define GLAD_PLATFORM_EMSCRIPTEN 0
+  #endif
+#endif
+
+#ifndef GLAD_PLATFORM_UWP
+  #if defined(_MSC_VER) && !defined(GLAD_INTERNAL_HAVE_WINAPIFAMILY)
+    #ifdef __has_include
+      #if __has_include(<winapifamily.h>)
+        #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1
+      #endif
+    #elif _MSC_VER >= 1700 && !_USING_V110_SDK71_
+      #define GLAD_INTERNAL_HAVE_WINAPIFAMILY 1
+    #endif
+  #endif
+
+  #ifdef GLAD_INTERNAL_HAVE_WINAPIFAMILY
+    #include <winapifamily.h>
+    #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) && WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP)
+      #define GLAD_PLATFORM_UWP 1
+    #endif
+  #endif
+
+  #ifndef GLAD_PLATFORM_UWP
+    #define GLAD_PLATFORM_UWP 0
+  #endif
+#endif
+
+#ifdef __GNUC__
+  #define GLAD_GNUC_EXTENSION __extension__
+#else
+  #define GLAD_GNUC_EXTENSION
+#endif
+
+#define GLAD_UNUSED(x) (void)(x)
+
+#ifndef GLAD_API_CALL
+  #if defined(GLAD_API_CALL_EXPORT)
+    #if GLAD_PLATFORM_WIN32 || defined(__CYGWIN__)
+      #if defined(GLAD_API_CALL_EXPORT_BUILD)
+        #if defined(__GNUC__)
+          #define GLAD_API_CALL __attribute__ ((dllexport)) extern
+        #else
+          #define GLAD_API_CALL __declspec(dllexport) extern
+        #endif
+      #else
+        #if defined(__GNUC__)
+          #define GLAD_API_CALL __attribute__ ((dllimport)) extern
+        #else
+          #define GLAD_API_CALL __declspec(dllimport) extern
+        #endif
+      #endif
+    #elif defined(__GNUC__) && defined(GLAD_API_CALL_EXPORT_BUILD)
+      #define GLAD_API_CALL __attribute__ ((visibility ("default"))) extern
+    #else
+      #define GLAD_API_CALL extern
+    #endif
+  #else
+    #define GLAD_API_CALL extern
+  #endif
+#endif
+
+#ifdef APIENTRY
+  #define GLAD_API_PTR APIENTRY
+#elif GLAD_PLATFORM_WIN32
+  #define GLAD_API_PTR __stdcall
+#else
+  #define GLAD_API_PTR
+#endif
+
+#ifndef GLAPI
+#define GLAPI GLAD_API_CALL
+#endif
+
+#ifndef GLAPIENTRY
+#define GLAPIENTRY GLAD_API_PTR
+#endif
+
+#define GLAD_MAKE_VERSION(major, minor) (major * 10000 + minor)
+#define GLAD_VERSION_MAJOR(version) (version / 10000)
+#define GLAD_VERSION_MINOR(version) (version % 10000)
+
+#define GLAD_GENERATOR_VERSION "2.0.3"
+
+typedef void (*GLADapiproc)(void);
+
+typedef GLADapiproc (*GLADloadfunc)(const char *name);
+typedef GLADapiproc (*GLADuserptrloadfunc)(void *userptr, const char *name);
+
+typedef void (*GLADprecallback)(const char *name, GLADapiproc apiproc, int len_args, ...);
+typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apiproc, int len_args, ...);
+
+#endif /* GLAD_PLATFORM_H_ */
+
+#define EGL_ALPHA_FORMAT 0x3088
+#define EGL_ALPHA_FORMAT_NONPRE 0x308B
+#define EGL_ALPHA_FORMAT_PRE 0x308C
+#define EGL_ALPHA_MASK_SIZE 0x303E
+#define EGL_ALPHA_SIZE 0x3021
+#define EGL_BACK_BUFFER 0x3084
+#define EGL_BAD_ACCESS 0x3002
+#define EGL_BAD_ALLOC 0x3003
+#define EGL_BAD_ATTRIBUTE 0x3004
+#define EGL_BAD_CONFIG 0x3005
+#define EGL_BAD_CONTEXT 0x3006
+#define EGL_BAD_CURRENT_SURFACE 0x3007
+#define EGL_BAD_DISPLAY 0x3008
+#define EGL_BAD_MATCH 0x3009
+#define EGL_BAD_NATIVE_PIXMAP 0x300A
+#define EGL_BAD_NATIVE_WINDOW 0x300B
+#define EGL_BAD_PARAMETER 0x300C
+#define EGL_BAD_SURFACE 0x300D
+#define EGL_BIND_TO_TEXTURE_RGB 0x3039
+#define EGL_BIND_TO_TEXTURE_RGBA 0x303A
+#define EGL_BLUE_SIZE 0x3022
+#define EGL_BUFFER_DESTROYED 0x3095
+#define EGL_BUFFER_PRESERVED 0x3094
+#define EGL_BUFFER_SIZE 0x3020
+#define EGL_CLIENT_APIS 0x308D
+#define EGL_CL_EVENT_HANDLE 0x309C
+#define EGL_COLORSPACE 0x3087
+#define EGL_COLORSPACE_LINEAR 0x308A
+#define EGL_COLORSPACE_sRGB 0x3089
+#define EGL_COLOR_BUFFER_TYPE 0x303F
+#define EGL_CONDITION_SATISFIED 0x30F6
+#define EGL_CONFIG_CAVEAT 0x3027
+#define EGL_CONFIG_ID 0x3028
+#define EGL_CONFORMANT 0x3042
+#define EGL_CONTEXT_CLIENT_TYPE 0x3097
+#define EGL_CONTEXT_CLIENT_VERSION 0x3098
+#define EGL_CONTEXT_LOST 0x300E
+#define EGL_CONTEXT_MAJOR_VERSION 0x3098
+#define EGL_CONTEXT_MINOR_VERSION 0x30FB
+#define EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT 0x00000002
+#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001
+#define EGL_CONTEXT_OPENGL_DEBUG 0x31B0
+#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1
+#define EGL_CONTEXT_OPENGL_PROFILE_MASK 0x30FD
+#define EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY 0x31BD
+#define EGL_CONTEXT_OPENGL_ROBUST_ACCESS 0x31B2
+#define EGL_CORE_NATIVE_ENGINE 0x305B
+#define EGL_DEFAULT_DISPLAY EGL_CAST(EGLNativeDisplayType,0)
+#define EGL_DEPTH_SIZE 0x3025
+#define EGL_DISPLAY_SCALING 10000
+#define EGL_DONT_CARE EGL_CAST(EGLint,-1)
+#define EGL_DRAW 0x3059
+#define EGL_EXTENSIONS 0x3055
+#define EGL_FALSE 0
+#define EGL_FOREVER 0xFFFFFFFFFFFFFFFF
+#define EGL_GL_COLORSPACE 0x309D
+#define EGL_GL_COLORSPACE_LINEAR 0x308A
+#define EGL_GL_COLORSPACE_SRGB 0x3089
+#define EGL_GL_RENDERBUFFER 0x30B9
+#define EGL_GL_TEXTURE_2D 0x30B1
+#define EGL_GL_TEXTURE_3D 0x30B2
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x30B4
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x30B6
+#define EGL_GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x30B8
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x30B3
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x30B5
+#define EGL_GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x30B7
+#define EGL_GL_TEXTURE_LEVEL 0x30BC
+#define EGL_GL_TEXTURE_ZOFFSET 0x30BD
+#define EGL_GREEN_SIZE 0x3023
+#define EGL_HEIGHT 0x3056
+#define EGL_HORIZONTAL_RESOLUTION 0x3090
+#define EGL_IMAGE_PRESERVED 0x30D2
+#define EGL_LARGEST_PBUFFER 0x3058
+#define EGL_LEVEL 0x3029
+#define EGL_LOSE_CONTEXT_ON_RESET 0x31BF
+#define EGL_LUMINANCE_BUFFER 0x308F
+#define EGL_LUMINANCE_SIZE 0x303D
+#define EGL_MATCH_NATIVE_PIXMAP 0x3041
+#define EGL_MAX_PBUFFER_HEIGHT 0x302A
+#define EGL_MAX_PBUFFER_PIXELS 0x302B
+#define EGL_MAX_PBUFFER_WIDTH 0x302C
+#define EGL_MAX_SWAP_INTERVAL 0x303C
+#define EGL_MIN_SWAP_INTERVAL 0x303B
+#define EGL_MIPMAP_LEVEL 0x3083
+#define EGL_MIPMAP_TEXTURE 0x3082
+#define EGL_MULTISAMPLE_RESOLVE 0x3099
+#define EGL_MULTISAMPLE_RESOLVE_BOX 0x309B
+#define EGL_MULTISAMPLE_RESOLVE_BOX_BIT 0x0200
+#define EGL_MULTISAMPLE_RESOLVE_DEFAULT 0x309A
+#define EGL_NATIVE_RENDERABLE 0x302D
+#define EGL_NATIVE_VISUAL_ID 0x302E
+#define EGL_NATIVE_VISUAL_TYPE 0x302F
+#define EGL_NONE 0x3038
+#define EGL_NON_CONFORMANT_CONFIG 0x3051
+#define EGL_NOT_INITIALIZED 0x3001
+#define EGL_NO_CONTEXT EGL_CAST(EGLContext,0)
+#define EGL_NO_DISPLAY EGL_CAST(EGLDisplay,0)
+#define EGL_NO_IMAGE EGL_CAST(EGLImage,0)
+#define EGL_NO_RESET_NOTIFICATION 0x31BE
+#define EGL_NO_SURFACE EGL_CAST(EGLSurface,0)
+#define EGL_NO_SYNC EGL_CAST(EGLSync,0)
+#define EGL_NO_TEXTURE 0x305C
+#define EGL_OPENGL_API 0x30A2
+#define EGL_OPENGL_BIT 0x0008
+#define EGL_OPENGL_ES2_BIT 0x0004
+#define EGL_OPENGL_ES3_BIT 0x00000040
+#define EGL_OPENGL_ES_API 0x30A0
+#define EGL_OPENGL_ES_BIT 0x0001
+#define EGL_OPENVG_API 0x30A1
+#define EGL_OPENVG_BIT 0x0002
+#define EGL_OPENVG_IMAGE 0x3096
+#define EGL_PBUFFER_BIT 0x0001
+#define EGL_PIXEL_ASPECT_RATIO 0x3092
+#define EGL_PIXMAP_BIT 0x0002
+#define EGL_PLATFORM_WAYLAND_KHR 0x31D8
+#define EGL_PLATFORM_X11_KHR 0x31D5
+#define EGL_PLATFORM_X11_SCREEN_KHR 0x31D6
+#define EGL_READ 0x305A
+#define EGL_RED_SIZE 0x3024
+#define EGL_RENDERABLE_TYPE 0x3040
+#define EGL_RENDER_BUFFER 0x3086
+#define EGL_RGB_BUFFER 0x308E
+#define EGL_SAMPLES 0x3031
+#define EGL_SAMPLE_BUFFERS 0x3032
+#define EGL_SIGNALED 0x30F2
+#define EGL_SINGLE_BUFFER 0x3085
+#define EGL_SLOW_CONFIG 0x3050
+#define EGL_STENCIL_SIZE 0x3026
+#define EGL_SUCCESS 0x3000
+#define EGL_SURFACE_TYPE 0x3033
+#define EGL_SWAP_BEHAVIOR 0x3093
+#define EGL_SWAP_BEHAVIOR_PRESERVED_BIT 0x0400
+#define EGL_SYNC_CL_EVENT 0x30FE
+#define EGL_SYNC_CL_EVENT_COMPLETE 0x30FF
+#define EGL_SYNC_CONDITION 0x30F8
+#define EGL_SYNC_FENCE 0x30F9
+#define EGL_SYNC_FLUSH_COMMANDS_BIT 0x0001
+#define EGL_SYNC_PRIOR_COMMANDS_COMPLETE 0x30F0
+#define EGL_SYNC_STATUS 0x30F1
+#define EGL_SYNC_TYPE 0x30F7
+#define EGL_TEXTURE_2D 0x305F
+#define EGL_TEXTURE_FORMAT 0x3080
+#define EGL_TEXTURE_RGB 0x305D
+#define EGL_TEXTURE_RGBA 0x305E
+#define EGL_TEXTURE_TARGET 0x3081
+#define EGL_TIMEOUT_EXPIRED 0x30F5
+#define EGL_TRANSPARENT_BLUE_VALUE 0x3035
+#define EGL_TRANSPARENT_GREEN_VALUE 0x3036
+#define EGL_TRANSPARENT_RED_VALUE 0x3037
+#define EGL_TRANSPARENT_RGB 0x3052
+#define EGL_TRANSPARENT_TYPE 0x3034
+#define EGL_TRUE 1
+#define EGL_UNKNOWN EGL_CAST(EGLint,-1)
+#define EGL_UNSIGNALED 0x30F3
+#define EGL_VENDOR 0x3053
+#define EGL_VERSION 0x3054
+#define EGL_VERTICAL_RESOLUTION 0x3091
+#define EGL_VG_ALPHA_FORMAT 0x3088
+#define EGL_VG_ALPHA_FORMAT_NONPRE 0x308B
+#define EGL_VG_ALPHA_FORMAT_PRE 0x308C
+#define EGL_VG_ALPHA_FORMAT_PRE_BIT 0x0040
+#define EGL_VG_COLORSPACE 0x3087
+#define EGL_VG_COLORSPACE_LINEAR 0x308A
+#define EGL_VG_COLORSPACE_LINEAR_BIT 0x0020
+#define EGL_VG_COLORSPACE_sRGB 0x3089
+#define EGL_WIDTH 0x3057
+#define EGL_WINDOW_BIT 0x0004
+
+
+#include <KHR/khrplatform.h>
+#include <EGL/eglplatform.h>
+
+
+
+
+
+
+
+
+
+
+
+struct AHardwareBuffer;
+struct wl_buffer;
+struct wl_display;
+struct wl_resource;
+
+typedef unsigned int EGLBoolean;
+typedef unsigned int EGLenum;
+typedef intptr_t EGLAttribKHR;
+typedef intptr_t EGLAttrib;
+typedef void *EGLClientBuffer;
+typedef void *EGLConfig;
+typedef void *EGLContext;
+typedef void *EGLDeviceEXT;
+typedef void *EGLDisplay;
+typedef void *EGLImage;
+typedef void *EGLImageKHR;
+typedef void *EGLLabelKHR;
+typedef void *EGLObjectKHR;
+typedef void *EGLOutputLayerEXT;
+typedef void *EGLOutputPortEXT;
+typedef void *EGLStreamKHR;
+typedef void *EGLSurface;
+typedef void *EGLSync;
+typedef void *EGLSyncKHR;
+typedef void *EGLSyncNV;
+typedef void (*__eglMustCastToProperFunctionPointerType)(void);
+typedef khronos_utime_nanoseconds_t EGLTimeKHR;
+typedef khronos_utime_nanoseconds_t EGLTime;
+typedef khronos_utime_nanoseconds_t EGLTimeNV;
+typedef khronos_utime_nanoseconds_t EGLuint64NV;
+typedef khronos_uint64_t EGLuint64KHR;
+typedef khronos_stime_nanoseconds_t EGLnsecsANDROID;
+typedef int EGLNativeFileDescriptorKHR;
+typedef khronos_ssize_t EGLsizeiANDROID;
+typedef void (*EGLSetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, const void *value, EGLsizeiANDROID valueSize);
+typedef EGLsizeiANDROID (*EGLGetBlobFuncANDROID) (const void *key, EGLsizeiANDROID keySize, void *value, EGLsizeiANDROID valueSize);
+struct EGLClientPixmapHI {
+    void  *pData;
+    EGLint iWidth;
+    EGLint iHeight;
+    EGLint iStride;
+};
+typedef void (GLAD_API_PTR *EGLDEBUGPROCKHR)(EGLenum error,const char *command,EGLint messageType,EGLLabelKHR threadLabel,EGLLabelKHR objectLabel,const char* message);
+#define PFNEGLBINDWAYLANDDISPLAYWL PFNEGLBINDWAYLANDDISPLAYWLPROC
+#define PFNEGLUNBINDWAYLANDDISPLAYWL PFNEGLUNBINDWAYLANDDISPLAYWLPROC
+#define PFNEGLQUERYWAYLANDBUFFERWL PFNEGLQUERYWAYLANDBUFFERWLPROC
+#define PFNEGLCREATEWAYLANDBUFFERFROMIMAGEWL PFNEGLCREATEWAYLANDBUFFERFROMIMAGEWLPROC
+
+
+#define EGL_VERSION_1_0 1
+GLAD_API_CALL int GLAD_EGL_VERSION_1_0;
+#define EGL_VERSION_1_1 1
+GLAD_API_CALL int GLAD_EGL_VERSION_1_1;
+#define EGL_VERSION_1_2 1
+GLAD_API_CALL int GLAD_EGL_VERSION_1_2;
+#define EGL_VERSION_1_3 1
+GLAD_API_CALL int GLAD_EGL_VERSION_1_3;
+#define EGL_VERSION_1_4 1
+GLAD_API_CALL int GLAD_EGL_VERSION_1_4;
+#define EGL_VERSION_1_5 1
+GLAD_API_CALL int GLAD_EGL_VERSION_1_5;
+#define EGL_ANDROID_blob_cache 1
+GLAD_API_CALL int GLAD_EGL_ANDROID_blob_cache;
+#define EGL_KHR_platform_wayland 1
+GLAD_API_CALL int GLAD_EGL_KHR_platform_wayland;
+#define EGL_KHR_platform_x11 1
+GLAD_API_CALL int GLAD_EGL_KHR_platform_x11;
+
+
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLBINDAPIPROC)(EGLenum api);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLBINDTEXIMAGEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLCHOOSECONFIGPROC)(EGLDisplay dpy, const EGLint * attrib_list, EGLConfig * configs, EGLint config_size, EGLint * num_config);
+typedef EGLint (GLAD_API_PTR *PFNEGLCLIENTWAITSYNCPROC)(EGLDisplay dpy, EGLSync sync, EGLint flags, EGLTime timeout);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLCOPYBUFFERSPROC)(EGLDisplay dpy, EGLSurface surface, EGLNativePixmapType target);
+typedef EGLContext (GLAD_API_PTR *PFNEGLCREATECONTEXTPROC)(EGLDisplay dpy, EGLConfig config, EGLContext share_context, const EGLint * attrib_list);
+typedef EGLImage (GLAD_API_PTR *PFNEGLCREATEIMAGEPROC)(EGLDisplay dpy, EGLContext ctx, EGLenum target, EGLClientBuffer buffer, const EGLAttrib * attrib_list);
+typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC)(EGLDisplay dpy, EGLenum buftype, EGLClientBuffer buffer, EGLConfig config, const EGLint * attrib_list);
+typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPBUFFERSURFACEPROC)(EGLDisplay dpy, EGLConfig config, const EGLint * attrib_list);
+typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPIXMAPSURFACEPROC)(EGLDisplay dpy, EGLConfig config, EGLNativePixmapType pixmap, const EGLint * attrib_list);
+typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC)(EGLDisplay dpy, EGLConfig config, void * native_pixmap, const EGLAttrib * attrib_list);
+typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEPLATFORMWINDOWSURFACEPROC)(EGLDisplay dpy, EGLConfig config, void * native_window, const EGLAttrib * attrib_list);
+typedef EGLSync (GLAD_API_PTR *PFNEGLCREATESYNCPROC)(EGLDisplay dpy, EGLenum type, const EGLAttrib * attrib_list);
+typedef EGLSurface (GLAD_API_PTR *PFNEGLCREATEWINDOWSURFACEPROC)(EGLDisplay dpy, EGLConfig config, EGLNativeWindowType win, const EGLint * attrib_list);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYCONTEXTPROC)(EGLDisplay dpy, EGLContext ctx);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYIMAGEPROC)(EGLDisplay dpy, EGLImage image);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYSURFACEPROC)(EGLDisplay dpy, EGLSurface surface);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLDESTROYSYNCPROC)(EGLDisplay dpy, EGLSync sync);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETCONFIGATTRIBPROC)(EGLDisplay dpy, EGLConfig config, EGLint attribute, EGLint * value);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETCONFIGSPROC)(EGLDisplay dpy, EGLConfig * configs, EGLint config_size, EGLint * num_config);
+typedef EGLContext (GLAD_API_PTR *PFNEGLGETCURRENTCONTEXTPROC)(void);
+typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETCURRENTDISPLAYPROC)(void);
+typedef EGLSurface (GLAD_API_PTR *PFNEGLGETCURRENTSURFACEPROC)(EGLint readdraw);
+typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETDISPLAYPROC)(EGLNativeDisplayType display_id);
+typedef EGLint (GLAD_API_PTR *PFNEGLGETERRORPROC)(void);
+typedef EGLDisplay (GLAD_API_PTR *PFNEGLGETPLATFORMDISPLAYPROC)(EGLenum platform, void * native_display, const EGLAttrib * attrib_list);
+typedef __eglMustCastToProperFunctionPointerType (GLAD_API_PTR *PFNEGLGETPROCADDRESSPROC)(const char * procname);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLGETSYNCATTRIBPROC)(EGLDisplay dpy, EGLSync sync, EGLint attribute, EGLAttrib * value);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLINITIALIZEPROC)(EGLDisplay dpy, EGLint * major, EGLint * minor);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLMAKECURRENTPROC)(EGLDisplay dpy, EGLSurface draw, EGLSurface read, EGLContext ctx);
+typedef EGLenum (GLAD_API_PTR *PFNEGLQUERYAPIPROC)(void);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLQUERYCONTEXTPROC)(EGLDisplay dpy, EGLContext ctx, EGLint attribute, EGLint * value);
+typedef const char * (GLAD_API_PTR *PFNEGLQUERYSTRINGPROC)(EGLDisplay dpy, EGLint name);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLQUERYSURFACEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint * value);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLRELEASETEXIMAGEPROC)(EGLDisplay dpy, EGLSurface surface, EGLint buffer);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLRELEASETHREADPROC)(void);
+typedef void (GLAD_API_PTR *PFNEGLSETBLOBCACHEFUNCSANDROIDPROC)(EGLDisplay dpy, EGLSetBlobFuncANDROID set, EGLGetBlobFuncANDROID get);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLSURFACEATTRIBPROC)(EGLDisplay dpy, EGLSurface surface, EGLint attribute, EGLint value);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLSWAPBUFFERSPROC)(EGLDisplay dpy, EGLSurface surface);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLSWAPINTERVALPROC)(EGLDisplay dpy, EGLint interval);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLTERMINATEPROC)(EGLDisplay dpy);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITCLIENTPROC)(void);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITGLPROC)(void);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITNATIVEPROC)(EGLint engine);
+typedef EGLBoolean (GLAD_API_PTR *PFNEGLWAITSYNCPROC)(EGLDisplay dpy, EGLSync sync, EGLint flags);
+
+GLAD_API_CALL PFNEGLBINDAPIPROC glad_eglBindAPI;
+#define eglBindAPI glad_eglBindAPI
+GLAD_API_CALL PFNEGLBINDTEXIMAGEPROC glad_eglBindTexImage;
+#define eglBindTexImage glad_eglBindTexImage
+GLAD_API_CALL PFNEGLCHOOSECONFIGPROC glad_eglChooseConfig;
+#define eglChooseConfig glad_eglChooseConfig
+GLAD_API_CALL PFNEGLCLIENTWAITSYNCPROC glad_eglClientWaitSync;
+#define eglClientWaitSync glad_eglClientWaitSync
+GLAD_API_CALL PFNEGLCOPYBUFFERSPROC glad_eglCopyBuffers;
+#define eglCopyBuffers glad_eglCopyBuffers
+GLAD_API_CALL PFNEGLCREATECONTEXTPROC glad_eglCreateContext;
+#define eglCreateContext glad_eglCreateContext
+GLAD_API_CALL PFNEGLCREATEIMAGEPROC glad_eglCreateImage;
+#define eglCreateImage glad_eglCreateImage
+GLAD_API_CALL PFNEGLCREATEPBUFFERFROMCLIENTBUFFERPROC glad_eglCreatePbufferFromClientBuffer;
+#define eglCreatePbufferFromClientBuffer glad_eglCreatePbufferFromClientBuffer
+GLAD_API_CALL PFNEGLCREATEPBUFFERSURFACEPROC glad_eglCreatePbufferSurface;
+#define eglCreatePbufferSurface glad_eglCreatePbufferSurface
+GLAD_API_CALL PFNEGLCREATEPIXMAPSURFACEPROC glad_eglCreatePixmapSurface;
+#define eglCreatePixmapSurface glad_eglCreatePixmapSurface
+GLAD_API_CALL PFNEGLCREATEPLATFORMPIXMAPSURFACEPROC glad_eglCreatePlatformPixmapSurface;
+#define eglCreatePlatformPixmapSurface glad_eglCreatePlatformPixmapSurface
+GLAD_API_CALL PFNEGLCREATEPLATFORMWINDOWSURFACEPROC glad_eglCreatePlatformWindowSurface;
+#define eglCreatePlatformWindowSurface glad_eglCreatePlatformWindowSurface
+GLAD_API_CALL PFNEGLCREATESYNCPROC glad_eglCreateSync;
+#define eglCreateSync glad_eglCreateSync
+GLAD_API_CALL PFNEGLCREATEWINDOWSURFACEPROC glad_eglCreateWindowSurface;
+#define eglCreateWindowSurface glad_eglCreateWindowSurface
+GLAD_API_CALL PFNEGLDESTROYCONTEXTPROC glad_eglDestroyContext;
+#define eglDestroyContext glad_eglDestroyContext
+GLAD_API_CALL PFNEGLDESTROYIMAGEPROC glad_eglDestroyImage;
+#define eglDestroyImage glad_eglDestroyImage
+GLAD_API_CALL PFNEGLDESTROYSURFACEPROC glad_eglDestroySurface;
+#define eglDestroySurface glad_eglDestroySurface
+GLAD_API_CALL PFNEGLDESTROYSYNCPROC glad_eglDestroySync;
+#define eglDestroySync glad_eglDestroySync
+GLAD_API_CALL PFNEGLGETCONFIGATTRIBPROC glad_eglGetConfigAttrib;
+#define eglGetConfigAttrib glad_eglGetConfigAttrib
+GLAD_API_CALL PFNEGLGETCONFIGSPROC glad_eglGetConfigs;
+#define eglGetConfigs glad_eglGetConfigs
+GLAD_API_CALL PFNEGLGETCURRENTCONTEXTPROC glad_eglGetCurrentContext;
+#define eglGetCurrentContext glad_eglGetCurrentContext
+GLAD_API_CALL PFNEGLGETCURRENTDISPLAYPROC glad_eglGetCurrentDisplay;
+#define eglGetCurrentDisplay glad_eglGetCurrentDisplay
+GLAD_API_CALL PFNEGLGETCURRENTSURFACEPROC glad_eglGetCurrentSurface;
+#define eglGetCurrentSurface glad_eglGetCurrentSurface
+GLAD_API_CALL PFNEGLGETDISPLAYPROC glad_eglGetDisplay;
+#define eglGetDisplay glad_eglGetDisplay
+GLAD_API_CALL PFNEGLGETERRORPROC glad_eglGetError;
+#define eglGetError glad_eglGetError
+GLAD_API_CALL PFNEGLGETPLATFORMDISPLAYPROC glad_eglGetPlatformDisplay;
+#define eglGetPlatformDisplay glad_eglGetPlatformDisplay
+GLAD_API_CALL PFNEGLGETPROCADDRESSPROC glad_eglGetProcAddress;
+#define eglGetProcAddress glad_eglGetProcAddress
+GLAD_API_CALL PFNEGLGETSYNCATTRIBPROC glad_eglGetSyncAttrib;
+#define eglGetSyncAttrib glad_eglGetSyncAttrib
+GLAD_API_CALL PFNEGLINITIALIZEPROC glad_eglInitialize;
+#define eglInitialize glad_eglInitialize
+GLAD_API_CALL PFNEGLMAKECURRENTPROC glad_eglMakeCurrent;
+#define eglMakeCurrent glad_eglMakeCurrent
+GLAD_API_CALL PFNEGLQUERYAPIPROC glad_eglQueryAPI;
+#define eglQueryAPI glad_eglQueryAPI
+GLAD_API_CALL PFNEGLQUERYCONTEXTPROC glad_eglQueryContext;
+#define eglQueryContext glad_eglQueryContext
+GLAD_API_CALL PFNEGLQUERYSTRINGPROC glad_eglQueryString;
+#define eglQueryString glad_eglQueryString
+GLAD_API_CALL PFNEGLQUERYSURFACEPROC glad_eglQuerySurface;
+#define eglQuerySurface glad_eglQuerySurface
+GLAD_API_CALL PFNEGLRELEASETEXIMAGEPROC glad_eglReleaseTexImage;
+#define eglReleaseTexImage glad_eglReleaseTexImage
+GLAD_API_CALL PFNEGLRELEASETHREADPROC glad_eglReleaseThread;
+#define eglReleaseThread glad_eglReleaseThread
+GLAD_API_CALL PFNEGLSETBLOBCACHEFUNCSANDROIDPROC glad_eglSetBlobCacheFuncsANDROID;
+#define eglSetBlobCacheFuncsANDROID glad_eglSetBlobCacheFuncsANDROID
+GLAD_API_CALL PFNEGLSURFACEATTRIBPROC glad_eglSurfaceAttrib;
+#define eglSurfaceAttrib glad_eglSurfaceAttrib
+GLAD_API_CALL PFNEGLSWAPBUFFERSPROC glad_eglSwapBuffers;
+#define eglSwapBuffers glad_eglSwapBuffers
+GLAD_API_CALL PFNEGLSWAPINTERVALPROC glad_eglSwapInterval;
+#define eglSwapInterval glad_eglSwapInterval
+GLAD_API_CALL PFNEGLTERMINATEPROC glad_eglTerminate;
+#define eglTerminate glad_eglTerminate
+GLAD_API_CALL PFNEGLWAITCLIENTPROC glad_eglWaitClient;
+#define eglWaitClient glad_eglWaitClient
+GLAD_API_CALL PFNEGLWAITGLPROC glad_eglWaitGL;
+#define eglWaitGL glad_eglWaitGL
+GLAD_API_CALL PFNEGLWAITNATIVEPROC glad_eglWaitNative;
+#define eglWaitNative glad_eglWaitNative
+GLAD_API_CALL PFNEGLWAITSYNCPROC glad_eglWaitSync;
+#define eglWaitSync glad_eglWaitSync
+
+
+
+
+
+GLAD_API_CALL int gladLoadEGLUserPtr(EGLDisplay display, GLADuserptrloadfunc load, void *userptr);
+GLAD_API_CALL int gladLoadEGL(EGLDisplay display, GLADloadfunc load);
+
+#ifdef GLAD_EGL
+
+GLAD_API_CALL int gladLoaderLoadEGL(EGLDisplay display);
+
+GLAD_API_CALL void gladLoaderUnloadEGL(void);
+
+#endif
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/thirdparty/glad/glad/gl.h b/thirdparty/glad/glad/gl.h
index b09b5ff280..307ea4dbb8 100644
--- a/thirdparty/glad/glad/gl.h
+++ b/thirdparty/glad/glad/gl.h
@@ -9,6 +9,7 @@
  *
  * APIs:
  *  - gl:compatibility=3.3
+ *  - gles2=3.2
  *
  * Options:
  *  - ALIAS = False
@@ -19,10 +20,10 @@
  *  - ON_DEMAND = False
  *
  * Commandline:
- *    --api='gl:compatibility=3.3' --extensions='GL_ARB_debug_output,GL_ARB_framebuffer_object,GL_ARB_get_program_binary,GL_EXT_framebuffer_blit,GL_EXT_framebuffer_multisample,GL_EXT_framebuffer_object,GL_OVR_multiview,GL_OVR_multiview2' c --loader
+ *    --merge --api='gl:compatibility=3.3,gles2=3.2' --extensions='GL_ARB_debug_output,GL_ARB_framebuffer_object,GL_ARB_get_program_binary,GL_EXT_framebuffer_blit,GL_EXT_framebuffer_multisample,GL_EXT_framebuffer_object,GL_OVR_multiview,GL_OVR_multiview2' c --loader
  *
  * Online:
- *    http://glad.sh/#api=gl%3Acompatibility%3D3.3&extensions=GL_ARB_debug_output%2CGL_ARB_framebuffer_object%2CGL_ARB_get_program_binary%2CGL_EXT_framebuffer_blit%2CGL_EXT_framebuffer_multisample%2CGL_EXT_framebuffer_object%2CGL_OVR_multiview%2CGL_OVR_multiview2&generator=c&options=LOADER
+ *    http://glad.sh/#api=gl%3Acompatibility%3D3.3%2Cgles2%3D3.2&extensions=GL_ARB_debug_output%2CGL_ARB_framebuffer_object%2CGL_ARB_get_program_binary%2CGL_EXT_framebuffer_blit%2CGL_EXT_framebuffer_multisample%2CGL_EXT_framebuffer_object%2CGL_OVR_multiview%2CGL_OVR_multiview2&generator=c&options=MERGE%2CLOADER
  *
  */
 
@@ -49,11 +50,24 @@
   #error OpenGL (gl3ext.h) header already included (API: gl), remove previous include!
 #endif
 #define __gl3ext_h_ 1
+#ifdef __gl2_h_
+  #error OpenGL ES 2 header already included (API: gles2), remove previous include!
+#endif
+#define __gl2_h_ 1
+#ifdef __gles2_gl2_h_
+  #error OpenGL ES 2 header already included (API: gles2), remove previous include!
+#endif
+#define __gles2_gl2_h_ 1
+#ifdef __gles2_gl3_h_
+  #error OpenGL ES 3 header already included (API: gles2), remove previous include!
+#endif
+#define __gles2_gl3_h_ 1
 #ifdef __clang__
 #pragma clang diagnostic pop
 #endif
 
 #define GLAD_GL
+#define GLAD_GLES2
 #define GLAD_OPTION_GL_LOADER
 
 #ifdef __cplusplus
@@ -1514,6 +1528,352 @@ typedef void (*GLADpostcallback)(void *ret, const char *name, GLADapiproc apipro
 #define GL_ZERO 0
 #define GL_ZOOM_X 0x0D16
 #define GL_ZOOM_Y 0x0D17
+#define GL_ACTIVE_ATOMIC_COUNTER_BUFFERS 0x92D9
+#define GL_ACTIVE_PROGRAM 0x8259
+#define GL_ACTIVE_RESOURCES 0x92F5
+#define GL_ACTIVE_VARIABLES 0x9305
+#define GL_ALL_BARRIER_BITS 0xFFFFFFFF
+#define GL_ALL_SHADER_BITS 0xFFFFFFFF
+#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A
+#define GL_ARRAY_SIZE 0x92FB
+#define GL_ARRAY_STRIDE 0x92FE
+#define GL_ATOMIC_COUNTER_BARRIER_BIT 0x00001000
+#define GL_ATOMIC_COUNTER_BUFFER 0x92C0
+#define GL_ATOMIC_COUNTER_BUFFER_BINDING 0x92C1
+#define GL_ATOMIC_COUNTER_BUFFER_INDEX 0x9301
+#define GL_ATOMIC_COUNTER_BUFFER_SIZE 0x92C3
+#define GL_ATOMIC_COUNTER_BUFFER_START 0x92C2
+#define GL_BLOCK_INDEX 0x92FD
+#define GL_BUFFER 0x82E0
+#define GL_BUFFER_BINDING 0x9302
+#define GL_BUFFER_DATA_SIZE 0x9303
+#define GL_BUFFER_UPDATE_BARRIER_BIT 0x00000200
+#define GL_BUFFER_VARIABLE 0x92E5
+#define GL_COLORBURN 0x929A
+#define GL_COLORDODGE 0x9299
+#define GL_COMMAND_BARRIER_BIT 0x00000040
+#define GL_COMPRESSED_R11_EAC 0x9270
+#define GL_COMPRESSED_RG11_EAC 0x9272
+#define GL_COMPRESSED_RGB8_ETC2 0x9274
+#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276
+#define GL_COMPRESSED_RGBA8_ETC2_EAC 0x9278
+#define GL_COMPRESSED_RGBA_ASTC_10x10 0x93BB
+#define GL_COMPRESSED_RGBA_ASTC_10x5 0x93B8
+#define GL_COMPRESSED_RGBA_ASTC_10x6 0x93B9
+#define GL_COMPRESSED_RGBA_ASTC_10x8 0x93BA
+#define GL_COMPRESSED_RGBA_ASTC_12x10 0x93BC
+#define GL_COMPRESSED_RGBA_ASTC_12x12 0x93BD
+#define GL_COMPRESSED_RGBA_ASTC_4x4 0x93B0
+#define GL_COMPRESSED_RGBA_ASTC_5x4 0x93B1
+#define GL_COMPRESSED_RGBA_ASTC_5x5 0x93B2
+#define GL_COMPRESSED_RGBA_ASTC_6x5 0x93B3
+#define GL_COMPRESSED_RGBA_ASTC_6x6 0x93B4
+#define GL_COMPRESSED_RGBA_ASTC_8x5 0x93B5
+#define GL_COMPRESSED_RGBA_ASTC_8x6 0x93B6
+#define GL_COMPRESSED_RGBA_ASTC_8x8 0x93B7
+#define GL_COMPRESSED_SIGNED_R11_EAC 0x9271
+#define GL_COMPRESSED_SIGNED_RG11_EAC 0x9273
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10 0x93DB
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5 0x93D8
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6 0x93D9
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8 0x93DA
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10 0x93DC
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12 0x93DD
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4 0x93D0
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4 0x93D1
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5 0x93D2
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5 0x93D3
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6 0x93D4
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5 0x93D5
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6 0x93D6
+#define GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8 0x93D7
+#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279
+#define GL_COMPRESSED_SRGB8_ETC2 0x9275
+#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277
+#define GL_COMPUTE_SHADER 0x91B9
+#define GL_COMPUTE_SHADER_BIT 0x00000020
+#define GL_COMPUTE_WORK_GROUP_SIZE 0x8267
+#define GL_CONTEXT_FLAG_DEBUG_BIT 0x00000002
+#define GL_CONTEXT_FLAG_ROBUST_ACCESS_BIT 0x00000004
+#define GL_CONTEXT_LOST 0x0507
+#define GL_COPY_READ_BUFFER_BINDING 0x8F36
+#define GL_COPY_WRITE_BUFFER_BINDING 0x8F37
+#define GL_DARKEN 0x9297
+#define GL_DEBUG_CALLBACK_FUNCTION 0x8244
+#define GL_DEBUG_CALLBACK_USER_PARAM 0x8245
+#define GL_DEBUG_GROUP_STACK_DEPTH 0x826D
+#define GL_DEBUG_LOGGED_MESSAGES 0x9145
+#define GL_DEBUG_NEXT_LOGGED_MESSAGE_LENGTH 0x8243
+#define GL_DEBUG_OUTPUT 0x92E0
+#define GL_DEBUG_OUTPUT_SYNCHRONOUS 0x8242
+#define GL_DEBUG_SEVERITY_HIGH 0x9146
+#define GL_DEBUG_SEVERITY_LOW 0x9148
+#define GL_DEBUG_SEVERITY_MEDIUM 0x9147
+#define GL_DEBUG_SEVERITY_NOTIFICATION 0x826B
+#define GL_DEBUG_SOURCE_API 0x8246
+#define GL_DEBUG_SOURCE_APPLICATION 0x824A
+#define GL_DEBUG_SOURCE_OTHER 0x824B
+#define GL_DEBUG_SOURCE_SHADER_COMPILER 0x8248
+#define GL_DEBUG_SOURCE_THIRD_PARTY 0x8249
+#define GL_DEBUG_SOURCE_WINDOW_SYSTEM 0x8247
+#define GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR 0x824D
+#define GL_DEBUG_TYPE_ERROR 0x824C
+#define GL_DEBUG_TYPE_MARKER 0x8268
+#define GL_DEBUG_TYPE_OTHER 0x8251
+#define GL_DEBUG_TYPE_PERFORMANCE 0x8250
+#define GL_DEBUG_TYPE_POP_GROUP 0x826A
+#define GL_DEBUG_TYPE_PORTABILITY 0x824F
+#define GL_DEBUG_TYPE_PUSH_GROUP 0x8269
+#define GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR 0x824E
+#define GL_DEPTH_STENCIL_TEXTURE_MODE 0x90EA
+#define GL_DIFFERENCE 0x929E
+#define GL_DISPATCH_INDIRECT_BUFFER 0x90EE
+#define GL_DISPATCH_INDIRECT_BUFFER_BINDING 0x90EF
+#define GL_DRAW_INDIRECT_BUFFER 0x8F3F
+#define GL_DRAW_INDIRECT_BUFFER_BINDING 0x8F43
+#define GL_ELEMENT_ARRAY_BARRIER_BIT 0x00000002
+#define GL_EXCLUSION 0x92A0
+#define GL_FIXED 0x140C
+#define GL_FRACTIONAL_EVEN 0x8E7C
+#define GL_FRACTIONAL_ODD 0x8E7B
+#define GL_FRAGMENT_INTERPOLATION_OFFSET_BITS 0x8E5D
+#define GL_FRAGMENT_SHADER_BIT 0x00000002
+#define GL_FRAMEBUFFER_BARRIER_BIT 0x00000400
+#define GL_FRAMEBUFFER_DEFAULT_FIXED_SAMPLE_LOCATIONS 0x9314
+#define GL_FRAMEBUFFER_DEFAULT_HEIGHT 0x9311
+#define GL_FRAMEBUFFER_DEFAULT_LAYERS 0x9312
+#define GL_FRAMEBUFFER_DEFAULT_SAMPLES 0x9313
+#define GL_FRAMEBUFFER_DEFAULT_WIDTH 0x9310
+#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9
+#define GL_GEOMETRY_SHADER_BIT 0x00000004
+#define GL_GEOMETRY_SHADER_INVOCATIONS 0x887F
+#define GL_GUILTY_CONTEXT_RESET 0x8253
+#define GL_HARDLIGHT 0x929B
+#define GL_HIGH_FLOAT 0x8DF2
+#define GL_HIGH_INT 0x8DF5
+#define GL_HSL_COLOR 0x92AF
+#define GL_HSL_HUE 0x92AD
+#define GL_HSL_LUMINOSITY 0x92B0
+#define GL_HSL_SATURATION 0x92AE
+#define GL_IMAGE_2D 0x904D
+#define GL_IMAGE_2D_ARRAY 0x9053
+#define GL_IMAGE_3D 0x904E
+#define GL_IMAGE_BINDING_ACCESS 0x8F3E
+#define GL_IMAGE_BINDING_FORMAT 0x906E
+#define GL_IMAGE_BINDING_LAYER 0x8F3D
+#define GL_IMAGE_BINDING_LAYERED 0x8F3C
+#define GL_IMAGE_BINDING_LEVEL 0x8F3B
+#define GL_IMAGE_BINDING_NAME 0x8F3A
+#define GL_IMAGE_BUFFER 0x9051
+#define GL_IMAGE_CUBE 0x9050
+#define GL_IMAGE_CUBE_MAP_ARRAY 0x9054
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_CLASS 0x90C9
+#define GL_IMAGE_FORMAT_COMPATIBILITY_BY_SIZE 0x90C8
+#define GL_IMAGE_FORMAT_COMPATIBILITY_TYPE 0x90C7
+#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B
+#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A
+#define GL_INNOCENT_CONTEXT_RESET 0x8254
+#define GL_INT_IMAGE_2D 0x9058
+#define GL_INT_IMAGE_2D_ARRAY 0x905E
+#define GL_INT_IMAGE_3D 0x9059
+#define GL_INT_IMAGE_BUFFER 0x905C
+#define GL_INT_IMAGE_CUBE 0x905B
+#define GL_INT_IMAGE_CUBE_MAP_ARRAY 0x905F
+#define GL_INT_SAMPLER_CUBE_MAP_ARRAY 0x900E
+#define GL_ISOLINES 0x8E7A
+#define GL_IS_PER_PATCH 0x92E7
+#define GL_IS_ROW_MAJOR 0x9300
+#define GL_LAYER_PROVOKING_VERTEX 0x825E
+#define GL_LIGHTEN 0x9298
+#define GL_LOCATION 0x930E
+#define GL_LOSE_CONTEXT_ON_RESET 0x8252
+#define GL_LOW_FLOAT 0x8DF0
+#define GL_LOW_INT 0x8DF3
+#define GL_MATRIX_STRIDE 0x92FF
+#define GL_MAX_ATOMIC_COUNTER_BUFFER_BINDINGS 0x92DC
+#define GL_MAX_ATOMIC_COUNTER_BUFFER_SIZE 0x92D8
+#define GL_MAX_COMBINED_ATOMIC_COUNTERS 0x92D7
+#define GL_MAX_COMBINED_ATOMIC_COUNTER_BUFFERS 0x92D1
+#define GL_MAX_COMBINED_COMPUTE_UNIFORM_COMPONENTS 0x8266
+#define GL_MAX_COMBINED_IMAGE_UNIFORMS 0x90CF
+#define GL_MAX_COMBINED_IMAGE_UNITS_AND_FRAGMENT_OUTPUTS 0x8F39
+#define GL_MAX_COMBINED_SHADER_OUTPUT_RESOURCES 0x8F39
+#define GL_MAX_COMBINED_SHADER_STORAGE_BLOCKS 0x90DC
+#define GL_MAX_COMBINED_TESS_CONTROL_UNIFORM_COMPONENTS 0x8E1E
+#define GL_MAX_COMBINED_TESS_EVALUATION_UNIFORM_COMPONENTS 0x8E1F
+#define GL_MAX_COMPUTE_ATOMIC_COUNTERS 0x8265
+#define GL_MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS 0x8264
+#define GL_MAX_COMPUTE_IMAGE_UNIFORMS 0x91BD
+#define GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS 0x90DB
+#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
+#define GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS 0x91BC
+#define GL_MAX_COMPUTE_UNIFORM_BLOCKS 0x91BB
+#define GL_MAX_COMPUTE_UNIFORM_COMPONENTS 0x8263
+#define GL_MAX_COMPUTE_WORK_GROUP_COUNT 0x91BE
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
+#define GL_MAX_COMPUTE_WORK_GROUP_SIZE 0x91BF
+#define GL_MAX_DEBUG_GROUP_STACK_DEPTH 0x826C
+#define GL_MAX_DEBUG_LOGGED_MESSAGES 0x9144
+#define GL_MAX_DEBUG_MESSAGE_LENGTH 0x9143
+#define GL_MAX_ELEMENT_INDEX 0x8D6B
+#define GL_MAX_FRAGMENT_ATOMIC_COUNTERS 0x92D6
+#define GL_MAX_FRAGMENT_ATOMIC_COUNTER_BUFFERS 0x92D0
+#define GL_MAX_FRAGMENT_IMAGE_UNIFORMS 0x90CE
+#define GL_MAX_FRAGMENT_INTERPOLATION_OFFSET 0x8E5C
+#define GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS 0x90DA
+#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD
+#define GL_MAX_FRAMEBUFFER_HEIGHT 0x9316
+#define GL_MAX_FRAMEBUFFER_LAYERS 0x9317
+#define GL_MAX_FRAMEBUFFER_SAMPLES 0x9318
+#define GL_MAX_FRAMEBUFFER_WIDTH 0x9315
+#define GL_MAX_GEOMETRY_ATOMIC_COUNTERS 0x92D5
+#define GL_MAX_GEOMETRY_ATOMIC_COUNTER_BUFFERS 0x92CF
+#define GL_MAX_GEOMETRY_IMAGE_UNIFORMS 0x90CD
+#define GL_MAX_GEOMETRY_SHADER_INVOCATIONS 0x8E5A
+#define GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS 0x90D7
+#define GL_MAX_IMAGE_UNITS 0x8F38
+#define GL_MAX_LABEL_LENGTH 0x82E8
+#define GL_MAX_NAME_LENGTH 0x92F6
+#define GL_MAX_NUM_ACTIVE_VARIABLES 0x92F7
+#define GL_MAX_PATCH_VERTICES 0x8E7D
+#define GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5F
+#define GL_MAX_SHADER_STORAGE_BLOCK_SIZE 0x90DE
+#define GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS 0x90DD
+#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTERS 0x92D3
+#define GL_MAX_TESS_CONTROL_ATOMIC_COUNTER_BUFFERS 0x92CD
+#define GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS 0x90CB
+#define GL_MAX_TESS_CONTROL_INPUT_COMPONENTS 0x886C
+#define GL_MAX_TESS_CONTROL_OUTPUT_COMPONENTS 0x8E83
+#define GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS 0x90D8
+#define GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS 0x8E81
+#define GL_MAX_TESS_CONTROL_TOTAL_OUTPUT_COMPONENTS 0x8E85
+#define GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS 0x8E89
+#define GL_MAX_TESS_CONTROL_UNIFORM_COMPONENTS 0x8E7F
+#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTERS 0x92D4
+#define GL_MAX_TESS_EVALUATION_ATOMIC_COUNTER_BUFFERS 0x92CE
+#define GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS 0x90CC
+#define GL_MAX_TESS_EVALUATION_INPUT_COMPONENTS 0x886D
+#define GL_MAX_TESS_EVALUATION_OUTPUT_COMPONENTS 0x8E86
+#define GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS 0x90D9
+#define GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS 0x8E82
+#define GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS 0x8E8A
+#define GL_MAX_TESS_EVALUATION_UNIFORM_COMPONENTS 0x8E80
+#define GL_MAX_TESS_GEN_LEVEL 0x8E7E
+#define GL_MAX_TESS_PATCH_COMPONENTS 0x8E84
+#define GL_MAX_UNIFORM_LOCATIONS 0x826E
+#define GL_MAX_VARYING_VECTORS 0x8DFC
+#define GL_MAX_VERTEX_ATOMIC_COUNTERS 0x92D2
+#define GL_MAX_VERTEX_ATOMIC_COUNTER_BUFFERS 0x92CC
+#define GL_MAX_VERTEX_ATTRIB_BINDINGS 0x82DA
+#define GL_MAX_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D9
+#define GL_MAX_VERTEX_ATTRIB_STRIDE 0x82E5
+#define GL_MAX_VERTEX_IMAGE_UNIFORMS 0x90CA
+#define GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS 0x90D6
+#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB
+#define GL_MEDIUM_FLOAT 0x8DF1
+#define GL_MEDIUM_INT 0x8DF4
+#define GL_MIN_FRAGMENT_INTERPOLATION_OFFSET 0x8E5B
+#define GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET 0x8E5E
+#define GL_MIN_SAMPLE_SHADING_VALUE 0x8C37
+#define GL_MULTIPLY 0x9294
+#define GL_MULTISAMPLE_LINE_WIDTH_GRANULARITY 0x9382
+#define GL_MULTISAMPLE_LINE_WIDTH_RANGE 0x9381
+#define GL_NAME_LENGTH 0x92F9
+#define GL_NO_RESET_NOTIFICATION 0x8261
+#define GL_NUM_ACTIVE_VARIABLES 0x9304
+#define GL_NUM_SAMPLE_COUNTS 0x9380
+#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9
+#define GL_OFFSET 0x92FC
+#define GL_OVERLAY 0x9296
+#define GL_PATCHES 0x000E
+#define GL_PATCH_VERTICES 0x8E72
+#define GL_PIXEL_BUFFER_BARRIER_BIT 0x00000080
+#define GL_PRIMITIVE_BOUNDING_BOX 0x92BE
+#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69
+#define GL_PRIMITIVE_RESTART_FOR_PATCHES_SUPPORTED 0x8221
+#define GL_PROGRAM 0x82E2
+#define GL_PROGRAM_INPUT 0x92E3
+#define GL_PROGRAM_OUTPUT 0x92E4
+#define GL_PROGRAM_PIPELINE 0x82E4
+#define GL_PROGRAM_PIPELINE_BINDING 0x825A
+#define GL_PROGRAM_SEPARABLE 0x8258
+#define GL_QUERY 0x82E3
+#define GL_REFERENCED_BY_COMPUTE_SHADER 0x930B
+#define GL_REFERENCED_BY_FRAGMENT_SHADER 0x930A
+#define GL_REFERENCED_BY_GEOMETRY_SHADER 0x9309
+#define GL_REFERENCED_BY_TESS_CONTROL_SHADER 0x9307
+#define GL_REFERENCED_BY_TESS_EVALUATION_SHADER 0x9308
+#define GL_REFERENCED_BY_VERTEX_SHADER 0x9306
+#define GL_RESET_NOTIFICATION_STRATEGY 0x8256
+#define GL_RGB565 0x8D62
+#define GL_SAMPLER 0x82E6
+#define GL_SAMPLER_CUBE_MAP_ARRAY 0x900C
+#define GL_SAMPLER_CUBE_MAP_ARRAY_SHADOW 0x900D
+#define GL_SAMPLE_SHADING 0x8C36
+#define GL_SCREEN 0x9295
+#define GL_SHADER 0x82E1
+#define GL_SHADER_BINARY_FORMATS 0x8DF8
+#define GL_SHADER_COMPILER 0x8DFA
+#define GL_SHADER_IMAGE_ACCESS_BARRIER_BIT 0x00000020
+#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000
+#define GL_SHADER_STORAGE_BLOCK 0x92E6
+#define GL_SHADER_STORAGE_BUFFER 0x90D2
+#define GL_SHADER_STORAGE_BUFFER_BINDING 0x90D3
+#define GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT 0x90DF
+#define GL_SHADER_STORAGE_BUFFER_SIZE 0x90D5
+#define GL_SHADER_STORAGE_BUFFER_START 0x90D4
+#define GL_SOFTLIGHT 0x929C
+#define GL_TESS_CONTROL_OUTPUT_VERTICES 0x8E75
+#define GL_TESS_CONTROL_SHADER 0x8E88
+#define GL_TESS_CONTROL_SHADER_BIT 0x00000008
+#define GL_TESS_EVALUATION_SHADER 0x8E87
+#define GL_TESS_EVALUATION_SHADER_BIT 0x00000010
+#define GL_TESS_GEN_MODE 0x8E76
+#define GL_TESS_GEN_POINT_MODE 0x8E79
+#define GL_TESS_GEN_SPACING 0x8E77
+#define GL_TESS_GEN_VERTEX_ORDER 0x8E78
+#define GL_TEXTURE_BINDING_CUBE_MAP_ARRAY 0x900A
+#define GL_TEXTURE_BUFFER_BINDING 0x8C2A
+#define GL_TEXTURE_BUFFER_OFFSET 0x919D
+#define GL_TEXTURE_BUFFER_OFFSET_ALIGNMENT 0x919F
+#define GL_TEXTURE_BUFFER_SIZE 0x919E
+#define GL_TEXTURE_CUBE_MAP_ARRAY 0x9009
+#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008
+#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F
+#define GL_TEXTURE_IMMUTABLE_LEVELS 0x82DF
+#define GL_TEXTURE_UPDATE_BARRIER_BIT 0x00000100
+#define GL_TOP_LEVEL_ARRAY_SIZE 0x930C
+#define GL_TOP_LEVEL_ARRAY_STRIDE 0x930D
+#define GL_TRANSFORM_FEEDBACK 0x8E22
+#define GL_TRANSFORM_FEEDBACK_ACTIVE 0x8E24
+#define GL_TRANSFORM_FEEDBACK_BARRIER_BIT 0x00000800
+#define GL_TRANSFORM_FEEDBACK_BINDING 0x8E25
+#define GL_TRANSFORM_FEEDBACK_BUFFER_ACTIVE 0x8E24
+#define GL_TRANSFORM_FEEDBACK_BUFFER_PAUSED 0x8E23
+#define GL_TRANSFORM_FEEDBACK_PAUSED 0x8E23
+#define GL_TRANSFORM_FEEDBACK_VARYING 0x92F4
+#define GL_TYPE 0x92FA
+#define GL_UNDEFINED_VERTEX 0x8260
+#define GL_UNIFORM 0x92E1
+#define GL_UNIFORM_BARRIER_BIT 0x00000004
+#define GL_UNIFORM_BLOCK 0x92E2
+#define GL_UNKNOWN_CONTEXT_RESET 0x8255
+#define GL_UNSIGNED_INT_ATOMIC_COUNTER 0x92DB
+#define GL_UNSIGNED_INT_IMAGE_2D 0x9063
+#define GL_UNSIGNED_INT_IMAGE_2D_ARRAY 0x9069
+#define GL_UNSIGNED_INT_IMAGE_3D 0x9064
+#define GL_UNSIGNED_INT_IMAGE_BUFFER 0x9067
+#define GL_UNSIGNED_INT_IMAGE_CUBE 0x9066
+#define GL_UNSIGNED_INT_IMAGE_CUBE_MAP_ARRAY 0x906A
+#define GL_UNSIGNED_INT_SAMPLER_CUBE_MAP_ARRAY 0x900F
+#define GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT 0x00000001
+#define GL_VERTEX_ATTRIB_BINDING 0x82D4
+#define GL_VERTEX_ATTRIB_RELATIVE_OFFSET 0x82D5
+#define GL_VERTEX_BINDING_BUFFER 0x8F4F
+#define GL_VERTEX_BINDING_DIVISOR 0x82D6
+#define GL_VERTEX_BINDING_OFFSET 0x82D7
+#define GL_VERTEX_BINDING_STRIDE 0x82D8
+#define GL_VERTEX_SHADER_BIT 0x00000001
 
 
 #include <KHR/khrplatform.h>
@@ -1605,6 +1965,14 @@ GLAD_API_CALL int GLAD_GL_VERSION_3_1;
 GLAD_API_CALL int GLAD_GL_VERSION_3_2;
 #define GL_VERSION_3_3 1
 GLAD_API_CALL int GLAD_GL_VERSION_3_3;
+#define GL_ES_VERSION_2_0 1
+GLAD_API_CALL int GLAD_GL_ES_VERSION_2_0;
+#define GL_ES_VERSION_3_0 1
+GLAD_API_CALL int GLAD_GL_ES_VERSION_3_0;
+#define GL_ES_VERSION_3_1 1
+GLAD_API_CALL int GLAD_GL_ES_VERSION_3_1;
+#define GL_ES_VERSION_3_2 1
+GLAD_API_CALL int GLAD_GL_ES_VERSION_3_2;
 #define GL_ARB_debug_output 1
 GLAD_API_CALL int GLAD_GL_ARB_debug_output;
 #define GL_ARB_framebuffer_object 1
@@ -2374,6 +2742,111 @@ typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3IPROC)(GLint x, GLint y, GLint z);
 typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3IVPROC)(const GLint * v);
 typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3SPROC)(GLshort x, GLshort y, GLshort z);
 typedef void (GLAD_API_PTR *PFNGLWINDOWPOS3SVPROC)(const GLshort * v);
+typedef void (GLAD_API_PTR *PFNGLACTIVESHADERPROGRAMPROC)(GLuint pipeline, GLuint program);
+typedef void (GLAD_API_PTR *PFNGLBINDIMAGETEXTUREPROC)(GLuint unit, GLuint texture, GLint level, GLboolean layered, GLint layer, GLenum access, GLenum format);
+typedef void (GLAD_API_PTR *PFNGLBINDPROGRAMPIPELINEPROC)(GLuint pipeline);
+typedef void (GLAD_API_PTR *PFNGLBINDTRANSFORMFEEDBACKPROC)(GLenum target, GLuint id);
+typedef void (GLAD_API_PTR *PFNGLBINDVERTEXBUFFERPROC)(GLuint bindingindex, GLuint buffer, GLintptr offset, GLsizei stride);
+typedef void (GLAD_API_PTR *PFNGLBLENDBARRIERPROC)(void);
+typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONSEPARATEIPROC)(GLuint buf, GLenum modeRGB, GLenum modeAlpha);
+typedef void (GLAD_API_PTR *PFNGLBLENDEQUATIONIPROC)(GLuint buf, GLenum mode);
+typedef void (GLAD_API_PTR *PFNGLBLENDFUNCSEPARATEIPROC)(GLuint buf, GLenum srcRGB, GLenum dstRGB, GLenum srcAlpha, GLenum dstAlpha);
+typedef void (GLAD_API_PTR *PFNGLBLENDFUNCIPROC)(GLuint buf, GLenum src, GLenum dst);
+typedef void (GLAD_API_PTR *PFNGLCLEARDEPTHFPROC)(GLfloat d);
+typedef void (GLAD_API_PTR *PFNGLCOPYIMAGESUBDATAPROC)(GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei srcWidth, GLsizei srcHeight, GLsizei srcDepth);
+typedef GLuint (GLAD_API_PTR *PFNGLCREATESHADERPROGRAMVPROC)(GLenum type, GLsizei count, const GLchar *const* strings);
+typedef void (GLAD_API_PTR *PFNGLDEBUGMESSAGECALLBACKPROC)(GLDEBUGPROC callback, const void * userParam);
+typedef void (GLAD_API_PTR *PFNGLDEBUGMESSAGECONTROLPROC)(GLenum source, GLenum type, GLenum severity, GLsizei count, const GLuint * ids, GLboolean enabled);
+typedef void (GLAD_API_PTR *PFNGLDEBUGMESSAGEINSERTPROC)(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar * buf);
+typedef void (GLAD_API_PTR *PFNGLDELETEPROGRAMPIPELINESPROC)(GLsizei n, const GLuint * pipelines);
+typedef void (GLAD_API_PTR *PFNGLDELETETRANSFORMFEEDBACKSPROC)(GLsizei n, const GLuint * ids);
+typedef void (GLAD_API_PTR *PFNGLDEPTHRANGEFPROC)(GLfloat n, GLfloat f);
+typedef void (GLAD_API_PTR *PFNGLDISPATCHCOMPUTEPROC)(GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z);
+typedef void (GLAD_API_PTR *PFNGLDISPATCHCOMPUTEINDIRECTPROC)(GLintptr indirect);
+typedef void (GLAD_API_PTR *PFNGLDRAWARRAYSINDIRECTPROC)(GLenum mode, const void * indirect);
+typedef void (GLAD_API_PTR *PFNGLDRAWELEMENTSINDIRECTPROC)(GLenum mode, GLenum type, const void * indirect);
+typedef void (GLAD_API_PTR *PFNGLFRAMEBUFFERPARAMETERIPROC)(GLenum target, GLenum pname, GLint param);
+typedef void (GLAD_API_PTR *PFNGLGENPROGRAMPIPELINESPROC)(GLsizei n, GLuint * pipelines);
+typedef void (GLAD_API_PTR *PFNGLGENTRANSFORMFEEDBACKSPROC)(GLsizei n, GLuint * ids);
+typedef GLuint (GLAD_API_PTR *PFNGLGETDEBUGMESSAGELOGPROC)(GLuint count, GLsizei bufSize, GLenum * sources, GLenum * types, GLuint * ids, GLenum * severities, GLsizei * lengths, GLchar * messageLog);
+typedef void (GLAD_API_PTR *PFNGLGETFRAMEBUFFERPARAMETERIVPROC)(GLenum target, GLenum pname, GLint * params);
+typedef GLenum (GLAD_API_PTR *PFNGLGETGRAPHICSRESETSTATUSPROC)(void);
+typedef void (GLAD_API_PTR *PFNGLGETINTERNALFORMATIVPROC)(GLenum target, GLenum internalformat, GLenum pname, GLsizei count, GLint * params);
+typedef void (GLAD_API_PTR *PFNGLGETOBJECTLABELPROC)(GLenum identifier, GLuint name, GLsizei bufSize, GLsizei * length, GLchar * label);
+typedef void (GLAD_API_PTR *PFNGLGETOBJECTPTRLABELPROC)(const void * ptr, GLsizei bufSize, GLsizei * length, GLchar * label);
+typedef void (GLAD_API_PTR *PFNGLGETPROGRAMINTERFACEIVPROC)(GLuint program, GLenum programInterface, GLenum pname, GLint * params);
+typedef void (GLAD_API_PTR *PFNGLGETPROGRAMPIPELINEINFOLOGPROC)(GLuint pipeline, GLsizei bufSize, GLsizei * length, GLchar * infoLog);
+typedef void (GLAD_API_PTR *PFNGLGETPROGRAMPIPELINEIVPROC)(GLuint pipeline, GLenum pname, GLint * params);
+typedef GLuint (GLAD_API_PTR *PFNGLGETPROGRAMRESOURCEINDEXPROC)(GLuint program, GLenum programInterface, const GLchar * name);
+typedef GLint (GLAD_API_PTR *PFNGLGETPROGRAMRESOURCELOCATIONPROC)(GLuint program, GLenum programInterface, const GLchar * name);
+typedef void (GLAD_API_PTR *PFNGLGETPROGRAMRESOURCENAMEPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei bufSize, GLsizei * length, GLchar * name);
+typedef void (GLAD_API_PTR *PFNGLGETPROGRAMRESOURCEIVPROC)(GLuint program, GLenum programInterface, GLuint index, GLsizei propCount, const GLenum * props, GLsizei count, GLsizei * length, GLint * params);
+typedef void (GLAD_API_PTR *PFNGLGETSHADERPRECISIONFORMATPROC)(GLenum shadertype, GLenum precisiontype, GLint * range, GLint * precision);
+typedef void (GLAD_API_PTR *PFNGLGETNUNIFORMFVPROC)(GLuint program, GLint location, GLsizei bufSize, GLfloat * params);
+typedef void (GLAD_API_PTR *PFNGLGETNUNIFORMIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLint * params);
+typedef void (GLAD_API_PTR *PFNGLGETNUNIFORMUIVPROC)(GLuint program, GLint location, GLsizei bufSize, GLuint * params);
+typedef void (GLAD_API_PTR *PFNGLINVALIDATEFRAMEBUFFERPROC)(GLenum target, GLsizei numAttachments, const GLenum * attachments);
+typedef void (GLAD_API_PTR *PFNGLINVALIDATESUBFRAMEBUFFERPROC)(GLenum target, GLsizei numAttachments, const GLenum * attachments, GLint x, GLint y, GLsizei width, GLsizei height);
+typedef GLboolean (GLAD_API_PTR *PFNGLISPROGRAMPIPELINEPROC)(GLuint pipeline);
+typedef GLboolean (GLAD_API_PTR *PFNGLISTRANSFORMFEEDBACKPROC)(GLuint id);
+typedef void (GLAD_API_PTR *PFNGLMEMORYBARRIERPROC)(GLbitfield barriers);
+typedef void (GLAD_API_PTR *PFNGLMEMORYBARRIERBYREGIONPROC)(GLbitfield barriers);
+typedef void (GLAD_API_PTR *PFNGLMINSAMPLESHADINGPROC)(GLfloat value);
+typedef void (GLAD_API_PTR *PFNGLOBJECTLABELPROC)(GLenum identifier, GLuint name, GLsizei length, const GLchar * label);
+typedef void (GLAD_API_PTR *PFNGLOBJECTPTRLABELPROC)(const void * ptr, GLsizei length, const GLchar * label);
+typedef void (GLAD_API_PTR *PFNGLPATCHPARAMETERIPROC)(GLenum pname, GLint value);
+typedef void (GLAD_API_PTR *PFNGLPAUSETRANSFORMFEEDBACKPROC)(void);
+typedef void (GLAD_API_PTR *PFNGLPOPDEBUGGROUPPROC)(void);
+typedef void (GLAD_API_PTR *PFNGLPRIMITIVEBOUNDINGBOXPROC)(GLfloat minX, GLfloat minY, GLfloat minZ, GLfloat minW, GLfloat maxX, GLfloat maxY, GLfloat maxZ, GLfloat maxW);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM1FPROC)(GLuint program, GLint location, GLfloat v0);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM1FVPROC)(GLuint program, GLint location, GLsizei count, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM1IPROC)(GLuint program, GLint location, GLint v0);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM1IVPROC)(GLuint program, GLint location, GLsizei count, const GLint * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM1UIPROC)(GLuint program, GLint location, GLuint v0);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM1UIVPROC)(GLuint program, GLint location, GLsizei count, const GLuint * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM2FPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM2FVPROC)(GLuint program, GLint location, GLsizei count, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM2IPROC)(GLuint program, GLint location, GLint v0, GLint v1);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM2IVPROC)(GLuint program, GLint location, GLsizei count, const GLint * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM2UIPROC)(GLuint program, GLint location, GLuint v0, GLuint v1);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM2UIVPROC)(GLuint program, GLint location, GLsizei count, const GLuint * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM3FPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM3FVPROC)(GLuint program, GLint location, GLsizei count, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM3IPROC)(GLuint program, GLint location, GLint v0, GLint v1, GLint v2);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM3IVPROC)(GLuint program, GLint location, GLsizei count, const GLint * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM3UIPROC)(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM3UIVPROC)(GLuint program, GLint location, GLsizei count, const GLuint * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM4FPROC)(GLuint program, GLint location, GLfloat v0, GLfloat v1, GLfloat v2, GLfloat v3);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM4FVPROC)(GLuint program, GLint location, GLsizei count, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM4IPROC)(GLuint program, GLint location, GLint v0, GLint v1, GLint v2, GLint v3);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM4IVPROC)(GLuint program, GLint location, GLsizei count, const GLint * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM4UIPROC)(GLuint program, GLint location, GLuint v0, GLuint v1, GLuint v2, GLuint v3);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORM4UIVPROC)(GLuint program, GLint location, GLsizei count, const GLuint * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX2FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX3FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX4FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC)(GLuint program, GLint location, GLsizei count, GLboolean transpose, const GLfloat * value);
+typedef void (GLAD_API_PTR *PFNGLPUSHDEBUGGROUPPROC)(GLenum source, GLuint id, GLsizei length, const GLchar * message);
+typedef void (GLAD_API_PTR *PFNGLREADNPIXELSPROC)(GLint x, GLint y, GLsizei width, GLsizei height, GLenum format, GLenum type, GLsizei bufSize, void * data);
+typedef void (GLAD_API_PTR *PFNGLRELEASESHADERCOMPILERPROC)(void);
+typedef void (GLAD_API_PTR *PFNGLRESUMETRANSFORMFEEDBACKPROC)(void);
+typedef void (GLAD_API_PTR *PFNGLSHADERBINARYPROC)(GLsizei count, const GLuint * shaders, GLenum binaryFormat, const void * binary, GLsizei length);
+typedef void (GLAD_API_PTR *PFNGLTEXBUFFERRANGEPROC)(GLenum target, GLenum internalformat, GLuint buffer, GLintptr offset, GLsizeiptr size);
+typedef void (GLAD_API_PTR *PFNGLTEXSTORAGE2DPROC)(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height);
+typedef void (GLAD_API_PTR *PFNGLTEXSTORAGE2DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations);
+typedef void (GLAD_API_PTR *PFNGLTEXSTORAGE3DPROC)(GLenum target, GLsizei levels, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth);
+typedef void (GLAD_API_PTR *PFNGLTEXSTORAGE3DMULTISAMPLEPROC)(GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations);
+typedef void (GLAD_API_PTR *PFNGLUSEPROGRAMSTAGESPROC)(GLuint pipeline, GLbitfield stages, GLuint program);
+typedef void (GLAD_API_PTR *PFNGLVALIDATEPROGRAMPIPELINEPROC)(GLuint pipeline);
+typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBBINDINGPROC)(GLuint attribindex, GLuint bindingindex);
+typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLboolean normalized, GLuint relativeoffset);
+typedef void (GLAD_API_PTR *PFNGLVERTEXATTRIBIFORMATPROC)(GLuint attribindex, GLint size, GLenum type, GLuint relativeoffset);
+typedef void (GLAD_API_PTR *PFNGLVERTEXBINDINGDIVISORPROC)(GLuint bindingindex, GLuint divisor);
 
 GLAD_API_CALL PFNGLACCUMPROC glad_glAccum;
 #define glAccum glad_glAccum
@@ -3877,6 +4350,216 @@ GLAD_API_CALL PFNGLWINDOWPOS3SPROC glad_glWindowPos3s;
 #define glWindowPos3s glad_glWindowPos3s
 GLAD_API_CALL PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv;
 #define glWindowPos3sv glad_glWindowPos3sv
+GLAD_API_CALL PFNGLACTIVESHADERPROGRAMPROC glad_glActiveShaderProgram;
+#define glActiveShaderProgram glad_glActiveShaderProgram
+GLAD_API_CALL PFNGLBINDIMAGETEXTUREPROC glad_glBindImageTexture;
+#define glBindImageTexture glad_glBindImageTexture
+GLAD_API_CALL PFNGLBINDPROGRAMPIPELINEPROC glad_glBindProgramPipeline;
+#define glBindProgramPipeline glad_glBindProgramPipeline
+GLAD_API_CALL PFNGLBINDTRANSFORMFEEDBACKPROC glad_glBindTransformFeedback;
+#define glBindTransformFeedback glad_glBindTransformFeedback
+GLAD_API_CALL PFNGLBINDVERTEXBUFFERPROC glad_glBindVertexBuffer;
+#define glBindVertexBuffer glad_glBindVertexBuffer
+GLAD_API_CALL PFNGLBLENDBARRIERPROC glad_glBlendBarrier;
+#define glBlendBarrier glad_glBlendBarrier
+GLAD_API_CALL PFNGLBLENDEQUATIONSEPARATEIPROC glad_glBlendEquationSeparatei;
+#define glBlendEquationSeparatei glad_glBlendEquationSeparatei
+GLAD_API_CALL PFNGLBLENDEQUATIONIPROC glad_glBlendEquationi;
+#define glBlendEquationi glad_glBlendEquationi
+GLAD_API_CALL PFNGLBLENDFUNCSEPARATEIPROC glad_glBlendFuncSeparatei;
+#define glBlendFuncSeparatei glad_glBlendFuncSeparatei
+GLAD_API_CALL PFNGLBLENDFUNCIPROC glad_glBlendFunci;
+#define glBlendFunci glad_glBlendFunci
+GLAD_API_CALL PFNGLCLEARDEPTHFPROC glad_glClearDepthf;
+#define glClearDepthf glad_glClearDepthf
+GLAD_API_CALL PFNGLCOPYIMAGESUBDATAPROC glad_glCopyImageSubData;
+#define glCopyImageSubData glad_glCopyImageSubData
+GLAD_API_CALL PFNGLCREATESHADERPROGRAMVPROC glad_glCreateShaderProgramv;
+#define glCreateShaderProgramv glad_glCreateShaderProgramv
+GLAD_API_CALL PFNGLDEBUGMESSAGECALLBACKPROC glad_glDebugMessageCallback;
+#define glDebugMessageCallback glad_glDebugMessageCallback
+GLAD_API_CALL PFNGLDEBUGMESSAGECONTROLPROC glad_glDebugMessageControl;
+#define glDebugMessageControl glad_glDebugMessageControl
+GLAD_API_CALL PFNGLDEBUGMESSAGEINSERTPROC glad_glDebugMessageInsert;
+#define glDebugMessageInsert glad_glDebugMessageInsert
+GLAD_API_CALL PFNGLDELETEPROGRAMPIPELINESPROC glad_glDeleteProgramPipelines;
+#define glDeleteProgramPipelines glad_glDeleteProgramPipelines
+GLAD_API_CALL PFNGLDELETETRANSFORMFEEDBACKSPROC glad_glDeleteTransformFeedbacks;
+#define glDeleteTransformFeedbacks glad_glDeleteTransformFeedbacks
+GLAD_API_CALL PFNGLDEPTHRANGEFPROC glad_glDepthRangef;
+#define glDepthRangef glad_glDepthRangef
+GLAD_API_CALL PFNGLDISPATCHCOMPUTEPROC glad_glDispatchCompute;
+#define glDispatchCompute glad_glDispatchCompute
+GLAD_API_CALL PFNGLDISPATCHCOMPUTEINDIRECTPROC glad_glDispatchComputeIndirect;
+#define glDispatchComputeIndirect glad_glDispatchComputeIndirect
+GLAD_API_CALL PFNGLDRAWARRAYSINDIRECTPROC glad_glDrawArraysIndirect;
+#define glDrawArraysIndirect glad_glDrawArraysIndirect
+GLAD_API_CALL PFNGLDRAWELEMENTSINDIRECTPROC glad_glDrawElementsIndirect;
+#define glDrawElementsIndirect glad_glDrawElementsIndirect
+GLAD_API_CALL PFNGLFRAMEBUFFERPARAMETERIPROC glad_glFramebufferParameteri;
+#define glFramebufferParameteri glad_glFramebufferParameteri
+GLAD_API_CALL PFNGLGENPROGRAMPIPELINESPROC glad_glGenProgramPipelines;
+#define glGenProgramPipelines glad_glGenProgramPipelines
+GLAD_API_CALL PFNGLGENTRANSFORMFEEDBACKSPROC glad_glGenTransformFeedbacks;
+#define glGenTransformFeedbacks glad_glGenTransformFeedbacks
+GLAD_API_CALL PFNGLGETDEBUGMESSAGELOGPROC glad_glGetDebugMessageLog;
+#define glGetDebugMessageLog glad_glGetDebugMessageLog
+GLAD_API_CALL PFNGLGETFRAMEBUFFERPARAMETERIVPROC glad_glGetFramebufferParameteriv;
+#define glGetFramebufferParameteriv glad_glGetFramebufferParameteriv
+GLAD_API_CALL PFNGLGETGRAPHICSRESETSTATUSPROC glad_glGetGraphicsResetStatus;
+#define glGetGraphicsResetStatus glad_glGetGraphicsResetStatus
+GLAD_API_CALL PFNGLGETINTERNALFORMATIVPROC glad_glGetInternalformativ;
+#define glGetInternalformativ glad_glGetInternalformativ
+GLAD_API_CALL PFNGLGETOBJECTLABELPROC glad_glGetObjectLabel;
+#define glGetObjectLabel glad_glGetObjectLabel
+GLAD_API_CALL PFNGLGETOBJECTPTRLABELPROC glad_glGetObjectPtrLabel;
+#define glGetObjectPtrLabel glad_glGetObjectPtrLabel
+GLAD_API_CALL PFNGLGETPROGRAMINTERFACEIVPROC glad_glGetProgramInterfaceiv;
+#define glGetProgramInterfaceiv glad_glGetProgramInterfaceiv
+GLAD_API_CALL PFNGLGETPROGRAMPIPELINEINFOLOGPROC glad_glGetProgramPipelineInfoLog;
+#define glGetProgramPipelineInfoLog glad_glGetProgramPipelineInfoLog
+GLAD_API_CALL PFNGLGETPROGRAMPIPELINEIVPROC glad_glGetProgramPipelineiv;
+#define glGetProgramPipelineiv glad_glGetProgramPipelineiv
+GLAD_API_CALL PFNGLGETPROGRAMRESOURCEINDEXPROC glad_glGetProgramResourceIndex;
+#define glGetProgramResourceIndex glad_glGetProgramResourceIndex
+GLAD_API_CALL PFNGLGETPROGRAMRESOURCELOCATIONPROC glad_glGetProgramResourceLocation;
+#define glGetProgramResourceLocation glad_glGetProgramResourceLocation
+GLAD_API_CALL PFNGLGETPROGRAMRESOURCENAMEPROC glad_glGetProgramResourceName;
+#define glGetProgramResourceName glad_glGetProgramResourceName
+GLAD_API_CALL PFNGLGETPROGRAMRESOURCEIVPROC glad_glGetProgramResourceiv;
+#define glGetProgramResourceiv glad_glGetProgramResourceiv
+GLAD_API_CALL PFNGLGETSHADERPRECISIONFORMATPROC glad_glGetShaderPrecisionFormat;
+#define glGetShaderPrecisionFormat glad_glGetShaderPrecisionFormat
+GLAD_API_CALL PFNGLGETNUNIFORMFVPROC glad_glGetnUniformfv;
+#define glGetnUniformfv glad_glGetnUniformfv
+GLAD_API_CALL PFNGLGETNUNIFORMIVPROC glad_glGetnUniformiv;
+#define glGetnUniformiv glad_glGetnUniformiv
+GLAD_API_CALL PFNGLGETNUNIFORMUIVPROC glad_glGetnUniformuiv;
+#define glGetnUniformuiv glad_glGetnUniformuiv
+GLAD_API_CALL PFNGLINVALIDATEFRAMEBUFFERPROC glad_glInvalidateFramebuffer;
+#define glInvalidateFramebuffer glad_glInvalidateFramebuffer
+GLAD_API_CALL PFNGLINVALIDATESUBFRAMEBUFFERPROC glad_glInvalidateSubFramebuffer;
+#define glInvalidateSubFramebuffer glad_glInvalidateSubFramebuffer
+GLAD_API_CALL PFNGLISPROGRAMPIPELINEPROC glad_glIsProgramPipeline;
+#define glIsProgramPipeline glad_glIsProgramPipeline
+GLAD_API_CALL PFNGLISTRANSFORMFEEDBACKPROC glad_glIsTransformFeedback;
+#define glIsTransformFeedback glad_glIsTransformFeedback
+GLAD_API_CALL PFNGLMEMORYBARRIERPROC glad_glMemoryBarrier;
+#define glMemoryBarrier glad_glMemoryBarrier
+GLAD_API_CALL PFNGLMEMORYBARRIERBYREGIONPROC glad_glMemoryBarrierByRegion;
+#define glMemoryBarrierByRegion glad_glMemoryBarrierByRegion
+GLAD_API_CALL PFNGLMINSAMPLESHADINGPROC glad_glMinSampleShading;
+#define glMinSampleShading glad_glMinSampleShading
+GLAD_API_CALL PFNGLOBJECTLABELPROC glad_glObjectLabel;
+#define glObjectLabel glad_glObjectLabel
+GLAD_API_CALL PFNGLOBJECTPTRLABELPROC glad_glObjectPtrLabel;
+#define glObjectPtrLabel glad_glObjectPtrLabel
+GLAD_API_CALL PFNGLPATCHPARAMETERIPROC glad_glPatchParameteri;
+#define glPatchParameteri glad_glPatchParameteri
+GLAD_API_CALL PFNGLPAUSETRANSFORMFEEDBACKPROC glad_glPauseTransformFeedback;
+#define glPauseTransformFeedback glad_glPauseTransformFeedback
+GLAD_API_CALL PFNGLPOPDEBUGGROUPPROC glad_glPopDebugGroup;
+#define glPopDebugGroup glad_glPopDebugGroup
+GLAD_API_CALL PFNGLPRIMITIVEBOUNDINGBOXPROC glad_glPrimitiveBoundingBox;
+#define glPrimitiveBoundingBox glad_glPrimitiveBoundingBox
+GLAD_API_CALL PFNGLPROGRAMUNIFORM1FPROC glad_glProgramUniform1f;
+#define glProgramUniform1f glad_glProgramUniform1f
+GLAD_API_CALL PFNGLPROGRAMUNIFORM1FVPROC glad_glProgramUniform1fv;
+#define glProgramUniform1fv glad_glProgramUniform1fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM1IPROC glad_glProgramUniform1i;
+#define glProgramUniform1i glad_glProgramUniform1i
+GLAD_API_CALL PFNGLPROGRAMUNIFORM1IVPROC glad_glProgramUniform1iv;
+#define glProgramUniform1iv glad_glProgramUniform1iv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM1UIPROC glad_glProgramUniform1ui;
+#define glProgramUniform1ui glad_glProgramUniform1ui
+GLAD_API_CALL PFNGLPROGRAMUNIFORM1UIVPROC glad_glProgramUniform1uiv;
+#define glProgramUniform1uiv glad_glProgramUniform1uiv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM2FPROC glad_glProgramUniform2f;
+#define glProgramUniform2f glad_glProgramUniform2f
+GLAD_API_CALL PFNGLPROGRAMUNIFORM2FVPROC glad_glProgramUniform2fv;
+#define glProgramUniform2fv glad_glProgramUniform2fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM2IPROC glad_glProgramUniform2i;
+#define glProgramUniform2i glad_glProgramUniform2i
+GLAD_API_CALL PFNGLPROGRAMUNIFORM2IVPROC glad_glProgramUniform2iv;
+#define glProgramUniform2iv glad_glProgramUniform2iv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM2UIPROC glad_glProgramUniform2ui;
+#define glProgramUniform2ui glad_glProgramUniform2ui
+GLAD_API_CALL PFNGLPROGRAMUNIFORM2UIVPROC glad_glProgramUniform2uiv;
+#define glProgramUniform2uiv glad_glProgramUniform2uiv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM3FPROC glad_glProgramUniform3f;
+#define glProgramUniform3f glad_glProgramUniform3f
+GLAD_API_CALL PFNGLPROGRAMUNIFORM3FVPROC glad_glProgramUniform3fv;
+#define glProgramUniform3fv glad_glProgramUniform3fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM3IPROC glad_glProgramUniform3i;
+#define glProgramUniform3i glad_glProgramUniform3i
+GLAD_API_CALL PFNGLPROGRAMUNIFORM3IVPROC glad_glProgramUniform3iv;
+#define glProgramUniform3iv glad_glProgramUniform3iv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM3UIPROC glad_glProgramUniform3ui;
+#define glProgramUniform3ui glad_glProgramUniform3ui
+GLAD_API_CALL PFNGLPROGRAMUNIFORM3UIVPROC glad_glProgramUniform3uiv;
+#define glProgramUniform3uiv glad_glProgramUniform3uiv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM4FPROC glad_glProgramUniform4f;
+#define glProgramUniform4f glad_glProgramUniform4f
+GLAD_API_CALL PFNGLPROGRAMUNIFORM4FVPROC glad_glProgramUniform4fv;
+#define glProgramUniform4fv glad_glProgramUniform4fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM4IPROC glad_glProgramUniform4i;
+#define glProgramUniform4i glad_glProgramUniform4i
+GLAD_API_CALL PFNGLPROGRAMUNIFORM4IVPROC glad_glProgramUniform4iv;
+#define glProgramUniform4iv glad_glProgramUniform4iv
+GLAD_API_CALL PFNGLPROGRAMUNIFORM4UIPROC glad_glProgramUniform4ui;
+#define glProgramUniform4ui glad_glProgramUniform4ui
+GLAD_API_CALL PFNGLPROGRAMUNIFORM4UIVPROC glad_glProgramUniform4uiv;
+#define glProgramUniform4uiv glad_glProgramUniform4uiv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX2FVPROC glad_glProgramUniformMatrix2fv;
+#define glProgramUniformMatrix2fv glad_glProgramUniformMatrix2fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX2X3FVPROC glad_glProgramUniformMatrix2x3fv;
+#define glProgramUniformMatrix2x3fv glad_glProgramUniformMatrix2x3fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX2X4FVPROC glad_glProgramUniformMatrix2x4fv;
+#define glProgramUniformMatrix2x4fv glad_glProgramUniformMatrix2x4fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX3FVPROC glad_glProgramUniformMatrix3fv;
+#define glProgramUniformMatrix3fv glad_glProgramUniformMatrix3fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX3X2FVPROC glad_glProgramUniformMatrix3x2fv;
+#define glProgramUniformMatrix3x2fv glad_glProgramUniformMatrix3x2fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX3X4FVPROC glad_glProgramUniformMatrix3x4fv;
+#define glProgramUniformMatrix3x4fv glad_glProgramUniformMatrix3x4fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX4FVPROC glad_glProgramUniformMatrix4fv;
+#define glProgramUniformMatrix4fv glad_glProgramUniformMatrix4fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX4X2FVPROC glad_glProgramUniformMatrix4x2fv;
+#define glProgramUniformMatrix4x2fv glad_glProgramUniformMatrix4x2fv
+GLAD_API_CALL PFNGLPROGRAMUNIFORMMATRIX4X3FVPROC glad_glProgramUniformMatrix4x3fv;
+#define glProgramUniformMatrix4x3fv glad_glProgramUniformMatrix4x3fv
+GLAD_API_CALL PFNGLPUSHDEBUGGROUPPROC glad_glPushDebugGroup;
+#define glPushDebugGroup glad_glPushDebugGroup
+GLAD_API_CALL PFNGLREADNPIXELSPROC glad_glReadnPixels;
+#define glReadnPixels glad_glReadnPixels
+GLAD_API_CALL PFNGLRELEASESHADERCOMPILERPROC glad_glReleaseShaderCompiler;
+#define glReleaseShaderCompiler glad_glReleaseShaderCompiler
+GLAD_API_CALL PFNGLRESUMETRANSFORMFEEDBACKPROC glad_glResumeTransformFeedback;
+#define glResumeTransformFeedback glad_glResumeTransformFeedback
+GLAD_API_CALL PFNGLSHADERBINARYPROC glad_glShaderBinary;
+#define glShaderBinary glad_glShaderBinary
+GLAD_API_CALL PFNGLTEXBUFFERRANGEPROC glad_glTexBufferRange;
+#define glTexBufferRange glad_glTexBufferRange
+GLAD_API_CALL PFNGLTEXSTORAGE2DPROC glad_glTexStorage2D;
+#define glTexStorage2D glad_glTexStorage2D
+GLAD_API_CALL PFNGLTEXSTORAGE2DMULTISAMPLEPROC glad_glTexStorage2DMultisample;
+#define glTexStorage2DMultisample glad_glTexStorage2DMultisample
+GLAD_API_CALL PFNGLTEXSTORAGE3DPROC glad_glTexStorage3D;
+#define glTexStorage3D glad_glTexStorage3D
+GLAD_API_CALL PFNGLTEXSTORAGE3DMULTISAMPLEPROC glad_glTexStorage3DMultisample;
+#define glTexStorage3DMultisample glad_glTexStorage3DMultisample
+GLAD_API_CALL PFNGLUSEPROGRAMSTAGESPROC glad_glUseProgramStages;
+#define glUseProgramStages glad_glUseProgramStages
+GLAD_API_CALL PFNGLVALIDATEPROGRAMPIPELINEPROC glad_glValidateProgramPipeline;
+#define glValidateProgramPipeline glad_glValidateProgramPipeline
+GLAD_API_CALL PFNGLVERTEXATTRIBBINDINGPROC glad_glVertexAttribBinding;
+#define glVertexAttribBinding glad_glVertexAttribBinding
+GLAD_API_CALL PFNGLVERTEXATTRIBFORMATPROC glad_glVertexAttribFormat;
+#define glVertexAttribFormat glad_glVertexAttribFormat
+GLAD_API_CALL PFNGLVERTEXATTRIBIFORMATPROC glad_glVertexAttribIFormat;
+#define glVertexAttribIFormat glad_glVertexAttribIFormat
+GLAD_API_CALL PFNGLVERTEXBINDINGDIVISORPROC glad_glVertexBindingDivisor;
+#define glVertexBindingDivisor glad_glVertexBindingDivisor
 
 
 
@@ -3885,6 +4568,9 @@ GLAD_API_CALL PFNGLWINDOWPOS3SVPROC glad_glWindowPos3sv;
 GLAD_API_CALL int gladLoadGLUserPtr( GLADuserptrloadfunc load, void *userptr);
 GLAD_API_CALL int gladLoadGL( GLADloadfunc load);
 
+GLAD_API_CALL int gladLoadGLES2UserPtr( GLADuserptrloadfunc load, void *userptr);
+GLAD_API_CALL int gladLoadGLES2( GLADloadfunc load);
+
 
 #ifdef GLAD_GL
 
@@ -3892,6 +4578,13 @@ GLAD_API_CALL int gladLoaderLoadGL(void);
 GLAD_API_CALL void gladLoaderUnloadGL(void);
 
 #endif
+#ifdef GLAD_GLES2
+
+GLAD_API_CALL int gladLoaderLoadGLES2(void);
+GLAD_API_CALL void gladLoaderUnloadGLES2(void);
+
+#endif /* GLAD_GLES2 */
+
 
 #ifdef __cplusplus
 }
diff --git a/thirdparty/glad/patches/patch_enable_both_gl_and_gles.diff b/thirdparty/glad/patches/patch_enable_both_gl_and_gles.diff
new file mode 100644
index 0000000000..a98efe51d8
--- /dev/null
+++ b/thirdparty/glad/patches/patch_enable_both_gl_and_gles.diff
@@ -0,0 +1,62 @@
+diff --git a/thirdparty/glad/gl.c b/thirdparty/glad/gl.c
+index a0b59dbbfb..9f10f6544a 100644
+--- a/thirdparty/glad/gl.c
++++ b/thirdparty/glad/gl.c
+@@ -2475,7 +2475,7 @@ static GLADapiproc glad_gl_get_proc(void *vuserptr, const char *name) {
+     return result;
+ }
+ 
+-static void* _glad_GL_loader_handle = NULL;
++static void* _glad_gles_loader_handle = NULL;
+ 
+ static void* glad_gl_dlopen_handle(void) {
+ #if GLAD_PLATFORM_APPLE
+@@ -2497,11 +2497,11 @@ static void* glad_gl_dlopen_handle(void) {
+     };
+ #endif
+ 
+-    if (_glad_GL_loader_handle == NULL) {
+-        _glad_GL_loader_handle = glad_get_dlopen_handle(NAMES, sizeof(NAMES) / sizeof(NAMES[0]));
++    if (_glad_gles_loader_handle == NULL) {
++        _glad_gles_loader_handle = glad_get_dlopen_handle(NAMES, sizeof(NAMES) / sizeof(NAMES[0]));
+     }
+ 
+-    return _glad_GL_loader_handle;
++    return _glad_gles_loader_handle;
+ }
+ 
+ static struct _glad_gl_userptr glad_gl_build_userptr(void *handle) {
+@@ -2527,7 +2527,7 @@ int gladLoaderLoadGL(void) {
+     int did_load = 0;
+     struct _glad_gl_userptr userptr;
+ 
+-    did_load = _glad_GL_loader_handle == NULL;
++    did_load = _glad_gles_loader_handle == NULL;
+     handle = glad_gl_dlopen_handle();
+     if (handle) {
+         userptr = glad_gl_build_userptr(handle);
+@@ -2545,9 +2545,9 @@ int gladLoaderLoadGL(void) {
+ 
+ 
+ void gladLoaderUnloadGL(void) {
+-    if (_glad_GL_loader_handle != NULL) {
+-        glad_close_dlopen_handle(_glad_GL_loader_handle);
+-        _glad_GL_loader_handle = NULL;
++    if (_glad_gles_loader_handle != NULL) {
++        glad_close_dlopen_handle(_glad_gles_loader_handle);
++        _glad_gles_loader_handle = NULL;
+     }
+ }
+ 
+diff --git a/thirdparty/glad/glad/gl.h b/thirdparty/glad/glad/gl.h
+index 905c16aeed..f3cb7d8cb5 100644
+--- a/thirdparty/glad/glad/gl.h
++++ b/thirdparty/glad/glad/gl.h
+@@ -67,6 +67,7 @@
+ #endif
+ 
+ #define GLAD_GL
++#define GLAD_GLES2
+ #define GLAD_OPTION_GL_LOADER
+ 
+ #ifdef __cplusplus