diff options
Diffstat (limited to 'thirdparty/spirv-cross/spirv_glsl.cpp')
-rw-r--r-- | thirdparty/spirv-cross/spirv_glsl.cpp | 19109 |
1 files changed, 19109 insertions, 0 deletions
diff --git a/thirdparty/spirv-cross/spirv_glsl.cpp b/thirdparty/spirv-cross/spirv_glsl.cpp new file mode 100644 index 0000000000..fad1132e82 --- /dev/null +++ b/thirdparty/spirv-cross/spirv_glsl.cpp @@ -0,0 +1,19109 @@ +/* + * Copyright 2015-2021 Arm Limited + * SPDX-License-Identifier: Apache-2.0 OR MIT + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * At your option, you may choose to accept this material under either: + * 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or + * 2. The MIT License, found at <http://opensource.org/licenses/MIT>. + */ + +#include "spirv_glsl.hpp" +#include "GLSL.std.450.h" +#include "spirv_common.hpp" +#include <algorithm> +#include <assert.h> +#include <cmath> +#include <limits> +#include <locale.h> +#include <utility> +#include <array> + +#ifndef _WIN32 +#include <langinfo.h> +#endif +#include <locale.h> + +using namespace spv; +using namespace SPIRV_CROSS_NAMESPACE; +using namespace std; + +enum ExtraSubExpressionType +{ + // Create masks above any legal ID range to allow multiple address spaces into the extra_sub_expressions map. + EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET = 0x10000000, + EXTRA_SUB_EXPRESSION_TYPE_AUX = 0x20000000 +}; + +static bool is_unsigned_opcode(Op op) +{ + // Don't have to be exhaustive, only relevant for legacy target checking ... + switch (op) + { + case OpShiftRightLogical: + case OpUGreaterThan: + case OpUGreaterThanEqual: + case OpULessThan: + case OpULessThanEqual: + case OpUConvert: + case OpUDiv: + case OpUMod: + case OpUMulExtended: + case OpConvertUToF: + case OpConvertFToU: + return true; + + default: + return false; + } +} + +static bool is_unsigned_glsl_opcode(GLSLstd450 op) +{ + // Don't have to be exhaustive, only relevant for legacy target checking ... + switch (op) + { + case GLSLstd450UClamp: + case GLSLstd450UMin: + case GLSLstd450UMax: + case GLSLstd450FindUMsb: + return true; + + default: + return false; + } +} + +static bool packing_is_vec4_padded(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingHLSLCbuffer: + case BufferPackingHLSLCbufferPackOffset: + case BufferPackingStd140: + case BufferPackingStd140EnhancedLayout: + return true; + + default: + return false; + } +} + +static bool packing_is_hlsl(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingHLSLCbuffer: + case BufferPackingHLSLCbufferPackOffset: + return true; + + default: + return false; + } +} + +static bool packing_has_flexible_offset(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingStd140: + case BufferPackingStd430: + case BufferPackingScalar: + case BufferPackingHLSLCbuffer: + return false; + + default: + return true; + } +} + +static bool packing_is_scalar(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingScalar: + case BufferPackingScalarEnhancedLayout: + return true; + + default: + return false; + } +} + +static BufferPackingStandard packing_to_substruct_packing(BufferPackingStandard packing) +{ + switch (packing) + { + case BufferPackingStd140EnhancedLayout: + return BufferPackingStd140; + case BufferPackingStd430EnhancedLayout: + return BufferPackingStd430; + case BufferPackingHLSLCbufferPackOffset: + return BufferPackingHLSLCbuffer; + case BufferPackingScalarEnhancedLayout: + return BufferPackingScalar; + default: + return packing; + } +} + +void CompilerGLSL::init() +{ + if (ir.source.known) + { + options.es = ir.source.es; + options.version = ir.source.version; + } + + // Query the locale to see what the decimal point is. + // We'll rely on fixing it up ourselves in the rare case we have a comma-as-decimal locale + // rather than setting locales ourselves. Settings locales in a safe and isolated way is rather + // tricky. +#ifdef _WIN32 + // On Windows, localeconv uses thread-local storage, so it should be fine. + const struct lconv *conv = localeconv(); + if (conv && conv->decimal_point) + current_locale_radix_character = *conv->decimal_point; +#elif defined(__ANDROID__) && __ANDROID_API__ < 26 + // nl_langinfo is not supported on this platform, fall back to the worse alternative. + const struct lconv *conv = localeconv(); + if (conv && conv->decimal_point) + current_locale_radix_character = *conv->decimal_point; +#else + // localeconv, the portable function is not MT safe ... + const char *decimal_point = nl_langinfo(RADIXCHAR); + if (decimal_point && *decimal_point != '\0') + current_locale_radix_character = *decimal_point; +#endif +} + +static const char *to_pls_layout(PlsFormat format) +{ + switch (format) + { + case PlsR11FG11FB10F: + return "layout(r11f_g11f_b10f) "; + case PlsR32F: + return "layout(r32f) "; + case PlsRG16F: + return "layout(rg16f) "; + case PlsRGB10A2: + return "layout(rgb10_a2) "; + case PlsRGBA8: + return "layout(rgba8) "; + case PlsRG16: + return "layout(rg16) "; + case PlsRGBA8I: + return "layout(rgba8i)"; + case PlsRG16I: + return "layout(rg16i) "; + case PlsRGB10A2UI: + return "layout(rgb10_a2ui) "; + case PlsRGBA8UI: + return "layout(rgba8ui) "; + case PlsRG16UI: + return "layout(rg16ui) "; + case PlsR32UI: + return "layout(r32ui) "; + default: + return ""; + } +} + +static std::pair<spv::Op, SPIRType::BaseType> pls_format_to_basetype(PlsFormat format) +{ + switch (format) + { + default: + case PlsR11FG11FB10F: + case PlsR32F: + case PlsRG16F: + case PlsRGB10A2: + case PlsRGBA8: + case PlsRG16: + return std::make_pair(spv::OpTypeFloat, SPIRType::Float); + + case PlsRGBA8I: + case PlsRG16I: + return std::make_pair(spv::OpTypeInt, SPIRType::Int); + + case PlsRGB10A2UI: + case PlsRGBA8UI: + case PlsRG16UI: + case PlsR32UI: + return std::make_pair(spv::OpTypeInt, SPIRType::UInt); + } +} + +static uint32_t pls_format_to_components(PlsFormat format) +{ + switch (format) + { + default: + case PlsR32F: + case PlsR32UI: + return 1; + + case PlsRG16F: + case PlsRG16: + case PlsRG16UI: + case PlsRG16I: + return 2; + + case PlsR11FG11FB10F: + return 3; + + case PlsRGB10A2: + case PlsRGBA8: + case PlsRGBA8I: + case PlsRGB10A2UI: + case PlsRGBA8UI: + return 4; + } +} + +const char *CompilerGLSL::vector_swizzle(int vecsize, int index) +{ + static const char *const swizzle[4][4] = { + { ".x", ".y", ".z", ".w" }, + { ".xy", ".yz", ".zw", nullptr }, + { ".xyz", ".yzw", nullptr, nullptr }, +#if defined(__GNUC__) && (__GNUC__ == 9) + // This works around a GCC 9 bug, see details in https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90947. + // This array ends up being compiled as all nullptrs, tripping the assertions below. + { "", nullptr, nullptr, "$" }, +#else + { "", nullptr, nullptr, nullptr }, +#endif + }; + + assert(vecsize >= 1 && vecsize <= 4); + assert(index >= 0 && index < 4); + assert(swizzle[vecsize - 1][index]); + + return swizzle[vecsize - 1][index]; +} + +void CompilerGLSL::reset(uint32_t iteration_count) +{ + // Sanity check the iteration count to be robust against a certain class of bugs where + // we keep forcing recompilations without making clear forward progress. + // In buggy situations we will loop forever, or loop for an unbounded number of iterations. + // Certain types of recompilations are considered to make forward progress, + // but in almost all situations, we'll never see more than 3 iterations. + // It is highly context-sensitive when we need to force recompilation, + // and it is not practical with the current architecture + // to resolve everything up front. + if (iteration_count >= options.force_recompile_max_debug_iterations && !is_force_recompile_forward_progress) + SPIRV_CROSS_THROW("Maximum compilation loops detected and no forward progress was made. Must be a SPIRV-Cross bug!"); + + // We do some speculative optimizations which should pretty much always work out, + // but just in case the SPIR-V is rather weird, recompile until it's happy. + // This typically only means one extra pass. + clear_force_recompile(); + + // Clear invalid expression tracking. + invalid_expressions.clear(); + composite_insert_overwritten.clear(); + current_function = nullptr; + + // Clear temporary usage tracking. + expression_usage_counts.clear(); + forwarded_temporaries.clear(); + suppressed_usage_tracking.clear(); + + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + flushed_phi_variables.clear(); + + current_emitting_switch_stack.clear(); + + reset_name_caches(); + + ir.for_each_typed_id<SPIRFunction>([&](uint32_t, SPIRFunction &func) { + func.active = false; + func.flush_undeclared = true; + }); + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { var.dependees.clear(); }); + + ir.reset_all_of_type<SPIRExpression>(); + ir.reset_all_of_type<SPIRAccessChain>(); + + statement_count = 0; + indent = 0; + current_loop_level = 0; +} + +void CompilerGLSL::remap_pls_variables() +{ + for (auto &input : pls_inputs) + { + auto &var = get<SPIRVariable>(input.id); + + bool input_is_target = false; + if (var.storage == StorageClassUniformConstant) + { + auto &type = get<SPIRType>(var.basetype); + input_is_target = type.image.dim == DimSubpassData; + } + + if (var.storage != StorageClassInput && !input_is_target) + SPIRV_CROSS_THROW("Can only use in and target variables for PLS inputs."); + var.remapped_variable = true; + } + + for (auto &output : pls_outputs) + { + auto &var = get<SPIRVariable>(output.id); + if (var.storage != StorageClassOutput) + SPIRV_CROSS_THROW("Can only use out variables for PLS outputs."); + var.remapped_variable = true; + } +} + +void CompilerGLSL::remap_ext_framebuffer_fetch(uint32_t input_attachment_index, uint32_t color_location, bool coherent) +{ + subpass_to_framebuffer_fetch_attachment.push_back({ input_attachment_index, color_location }); + inout_color_attachments.push_back({ color_location, coherent }); +} + +bool CompilerGLSL::location_is_framebuffer_fetch(uint32_t location) const +{ + return std::find_if(begin(inout_color_attachments), end(inout_color_attachments), + [&](const std::pair<uint32_t, bool> &elem) { + return elem.first == location; + }) != end(inout_color_attachments); +} + +bool CompilerGLSL::location_is_non_coherent_framebuffer_fetch(uint32_t location) const +{ + return std::find_if(begin(inout_color_attachments), end(inout_color_attachments), + [&](const std::pair<uint32_t, bool> &elem) { + return elem.first == location && !elem.second; + }) != end(inout_color_attachments); +} + +void CompilerGLSL::find_static_extensions() +{ + ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) { + if (type.basetype == SPIRType::Double) + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_gpu_shader_fp64"); + } + else if (type.basetype == SPIRType::Int64 || type.basetype == SPIRType::UInt64) + { + if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); + require_extension_internal("GL_ARB_gpu_shader_int64"); + } + else if (type.basetype == SPIRType::Half) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_float16"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_16bit_storage"); + } + else if (type.basetype == SPIRType::SByte || type.basetype == SPIRType::UByte) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int8"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_8bit_storage"); + } + else if (type.basetype == SPIRType::Short || type.basetype == SPIRType::UShort) + { + require_extension_internal("GL_EXT_shader_explicit_arithmetic_types_int16"); + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_shader_16bit_storage"); + } + }); + + auto &execution = get_entry_point(); + switch (execution.model) + { + case ExecutionModelGLCompute: + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_compute_shader"); + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for compute shaders."); + break; + + case ExecutionModelGeometry: + if (options.es && options.version < 320) + require_extension_internal("GL_EXT_geometry_shader"); + if (!options.es && options.version < 150) + require_extension_internal("GL_ARB_geometry_shader4"); + + if (execution.flags.get(ExecutionModeInvocations) && execution.invocations != 1) + { + // Instanced GS is part of 400 core or this extension. + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_gpu_shader5"); + } + break; + + case ExecutionModelTessellationEvaluation: + case ExecutionModelTessellationControl: + if (options.es && options.version < 320) + require_extension_internal("GL_EXT_tessellation_shader"); + if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_tessellation_shader"); + break; + + case ExecutionModelRayGenerationKHR: + case ExecutionModelIntersectionKHR: + case ExecutionModelAnyHitKHR: + case ExecutionModelClosestHitKHR: + case ExecutionModelMissKHR: + case ExecutionModelCallableKHR: + // NV enums are aliases. + if (options.es || options.version < 460) + SPIRV_CROSS_THROW("Ray tracing shaders require non-es profile with version 460 or above."); + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Ray tracing requires Vulkan semantics."); + + // Need to figure out if we should target KHR or NV extension based on capabilities. + for (auto &cap : ir.declared_capabilities) + { + if (cap == CapabilityRayTracingKHR || cap == CapabilityRayQueryKHR || + cap == CapabilityRayTraversalPrimitiveCullingKHR) + { + ray_tracing_is_khr = true; + break; + } + } + + if (ray_tracing_is_khr) + { + // In KHR ray tracing we pass payloads by pointer instead of location, + // so make sure we assign locations properly. + ray_tracing_khr_fixup_locations(); + require_extension_internal("GL_EXT_ray_tracing"); + } + else + require_extension_internal("GL_NV_ray_tracing"); + break; + + case ExecutionModelMeshEXT: + case ExecutionModelTaskEXT: + if (options.es || options.version < 450) + SPIRV_CROSS_THROW("Mesh shaders require GLSL 450 or above."); + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Mesh shaders require Vulkan semantics."); + require_extension_internal("GL_EXT_mesh_shader"); + break; + + default: + break; + } + + if (!pls_inputs.empty() || !pls_outputs.empty()) + { + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Can only use GL_EXT_shader_pixel_local_storage in fragment shaders."); + require_extension_internal("GL_EXT_shader_pixel_local_storage"); + } + + if (!inout_color_attachments.empty()) + { + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Can only use GL_EXT_shader_framebuffer_fetch in fragment shaders."); + if (options.vulkan_semantics) + SPIRV_CROSS_THROW("Cannot use EXT_shader_framebuffer_fetch in Vulkan GLSL."); + + bool has_coherent = false; + bool has_incoherent = false; + + for (auto &att : inout_color_attachments) + { + if (att.second) + has_coherent = true; + else + has_incoherent = true; + } + + if (has_coherent) + require_extension_internal("GL_EXT_shader_framebuffer_fetch"); + if (has_incoherent) + require_extension_internal("GL_EXT_shader_framebuffer_fetch_non_coherent"); + } + + if (options.separate_shader_objects && !options.es && options.version < 410) + require_extension_internal("GL_ARB_separate_shader_objects"); + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_buffer_reference is only supported in Vulkan GLSL."); + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("GL_EXT_buffer_reference requires GLSL 450."); + require_extension_internal("GL_EXT_buffer_reference2"); + } + else if (ir.addressing_model != AddressingModelLogical) + { + SPIRV_CROSS_THROW("Only Logical and PhysicalStorageBuffer64EXT addressing models are supported."); + } + + // Check for nonuniform qualifier and passthrough. + // Instead of looping over all decorations to find this, just look at capabilities. + for (auto &cap : ir.declared_capabilities) + { + switch (cap) + { + case CapabilityShaderNonUniformEXT: + if (!options.vulkan_semantics) + require_extension_internal("GL_NV_gpu_shader5"); + else + require_extension_internal("GL_EXT_nonuniform_qualifier"); + break; + case CapabilityRuntimeDescriptorArrayEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_nonuniform_qualifier is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_nonuniform_qualifier"); + break; + + case CapabilityGeometryShaderPassthroughNV: + if (execution.model == ExecutionModelGeometry) + { + require_extension_internal("GL_NV_geometry_shader_passthrough"); + execution.geometry_passthrough = true; + } + break; + + case CapabilityVariablePointers: + case CapabilityVariablePointersStorageBuffer: + SPIRV_CROSS_THROW("VariablePointers capability is not supported in GLSL."); + + case CapabilityMultiView: + if (options.vulkan_semantics) + require_extension_internal("GL_EXT_multiview"); + else + { + require_extension_internal("GL_OVR_multiview2"); + if (options.ovr_multiview_view_count == 0) + SPIRV_CROSS_THROW("ovr_multiview_view_count must be non-zero when using GL_OVR_multiview2."); + if (get_execution_model() != ExecutionModelVertex) + SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); + } + break; + + case CapabilityRayQueryKHR: + if (options.es || options.version < 460 || !options.vulkan_semantics) + SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460."); + require_extension_internal("GL_EXT_ray_query"); + ray_tracing_is_khr = true; + break; + + case CapabilityRayTraversalPrimitiveCullingKHR: + if (options.es || options.version < 460 || !options.vulkan_semantics) + SPIRV_CROSS_THROW("RayQuery requires Vulkan GLSL 460."); + require_extension_internal("GL_EXT_ray_flags_primitive_culling"); + ray_tracing_is_khr = true; + break; + + default: + break; + } + } + + if (options.ovr_multiview_view_count) + { + if (options.vulkan_semantics) + SPIRV_CROSS_THROW("OVR_multiview2 cannot be used with Vulkan semantics."); + if (get_execution_model() != ExecutionModelVertex) + SPIRV_CROSS_THROW("OVR_multiview2 can only be used with Vertex shaders."); + require_extension_internal("GL_OVR_multiview2"); + } + + // KHR one is likely to get promoted at some point, so if we don't see an explicit SPIR-V extension, assume KHR. + for (auto &ext : ir.declared_extensions) + if (ext == "SPV_NV_fragment_shader_barycentric") + barycentric_is_nv = true; +} + +void CompilerGLSL::require_polyfill(Polyfill polyfill, bool relaxed) +{ + uint32_t &polyfills = (relaxed && (options.es || options.vulkan_semantics)) ? + required_polyfills_relaxed : required_polyfills; + + if ((polyfills & polyfill) == 0) + { + polyfills |= polyfill; + force_recompile(); + } +} + +void CompilerGLSL::ray_tracing_khr_fixup_locations() +{ + uint32_t location = 0; + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + // Incoming payload storage can also be used for tracing. + if (var.storage != StorageClassRayPayloadKHR && var.storage != StorageClassCallableDataKHR && + var.storage != StorageClassIncomingRayPayloadKHR && var.storage != StorageClassIncomingCallableDataKHR) + return; + if (is_hidden_variable(var)) + return; + set_decoration(var.self, DecorationLocation, location++); + }); +} + +string CompilerGLSL::compile() +{ + ir.fixup_reserved_names(); + + if (!options.vulkan_semantics) + { + // only NV_gpu_shader5 supports divergent indexing on OpenGL, and it does so without extra qualifiers + backend.nonuniform_qualifier = ""; + backend.needs_row_major_load_workaround = options.enable_row_major_load_workaround; + } + backend.allow_precision_qualifiers = options.vulkan_semantics || options.es; + backend.force_gl_in_out_block = true; + backend.supports_extensions = true; + backend.use_array_constructor = true; + backend.workgroup_size_is_hidden = true; + backend.requires_relaxed_precision_analysis = options.es || options.vulkan_semantics; + backend.support_precise_qualifier = + (!options.es && options.version >= 400) || (options.es && options.version >= 320); + + if (is_legacy_es()) + backend.support_case_fallthrough = false; + + // Scan the SPIR-V to find trivial uses of extensions. + fixup_anonymous_struct_names(); + fixup_type_alias(); + reorder_type_alias(); + build_function_control_flow_graphs_and_analyze(); + find_static_extensions(); + fixup_image_load_store_access(); + update_active_builtins(); + analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); + if (!inout_color_attachments.empty()) + emit_inout_fragment_outputs_copy_to_subpass_inputs(); + + // Shaders might cast unrelated data to pointers of non-block types. + // Find all such instances and make sure we can cast the pointers to a synthesized block type. + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + analyze_non_block_pointer_types(); + + uint32_t pass_count = 0; + do + { + reset(pass_count); + + buffer.reset(); + + emit_header(); + emit_resources(); + emit_extension_workarounds(get_execution_model()); + + if (required_polyfills != 0) + emit_polyfills(required_polyfills, false); + if ((options.es || options.vulkan_semantics) && required_polyfills_relaxed != 0) + emit_polyfills(required_polyfills_relaxed, true); + + emit_function(get<SPIRFunction>(ir.default_entry_point), Bitset()); + + pass_count++; + } while (is_forcing_recompilation()); + + // Implement the interlocked wrapper function at the end. + // The body was implemented in lieu of main(). + if (interlocked_is_complex) + { + statement("void main()"); + begin_scope(); + statement("// Interlocks were used in a way not compatible with GLSL, this is very slow."); + statement("SPIRV_Cross_beginInvocationInterlock();"); + statement("spvMainInterlockedBody();"); + statement("SPIRV_Cross_endInvocationInterlock();"); + end_scope(); + } + + // Entry point in GLSL is always main(). + get_entry_point().name = "main"; + + return buffer.str(); +} + +std::string CompilerGLSL::get_partial_source() +{ + return buffer.str(); +} + +void CompilerGLSL::build_workgroup_size(SmallVector<string> &arguments, const SpecializationConstant &wg_x, + const SpecializationConstant &wg_y, const SpecializationConstant &wg_z) +{ + auto &execution = get_entry_point(); + bool builtin_workgroup = execution.workgroup_size.constant != 0; + bool use_local_size_id = !builtin_workgroup && execution.flags.get(ExecutionModeLocalSizeId); + + if (wg_x.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_x_id = ", wg_x.constant_id)); + else + arguments.push_back(join("local_size_x = ", get<SPIRConstant>(wg_x.id).specialization_constant_macro_name)); + } + else if (use_local_size_id && execution.workgroup_size.id_x) + arguments.push_back(join("local_size_x = ", get<SPIRConstant>(execution.workgroup_size.id_x).scalar())); + else + arguments.push_back(join("local_size_x = ", execution.workgroup_size.x)); + + if (wg_y.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_y_id = ", wg_y.constant_id)); + else + arguments.push_back(join("local_size_y = ", get<SPIRConstant>(wg_y.id).specialization_constant_macro_name)); + } + else if (use_local_size_id && execution.workgroup_size.id_y) + arguments.push_back(join("local_size_y = ", get<SPIRConstant>(execution.workgroup_size.id_y).scalar())); + else + arguments.push_back(join("local_size_y = ", execution.workgroup_size.y)); + + if (wg_z.id) + { + if (options.vulkan_semantics) + arguments.push_back(join("local_size_z_id = ", wg_z.constant_id)); + else + arguments.push_back(join("local_size_z = ", get<SPIRConstant>(wg_z.id).specialization_constant_macro_name)); + } + else if (use_local_size_id && execution.workgroup_size.id_z) + arguments.push_back(join("local_size_z = ", get<SPIRConstant>(execution.workgroup_size.id_z).scalar())); + else + arguments.push_back(join("local_size_z = ", execution.workgroup_size.z)); +} + +void CompilerGLSL::request_subgroup_feature(ShaderSubgroupSupportHelper::Feature feature) +{ + if (options.vulkan_semantics) + { + auto khr_extension = ShaderSubgroupSupportHelper::get_KHR_extension_for_feature(feature); + require_extension_internal(ShaderSubgroupSupportHelper::get_extension_name(khr_extension)); + } + else + { + if (!shader_subgroup_supporter.is_feature_requested(feature)) + force_recompile(); + shader_subgroup_supporter.request_feature(feature); + } +} + +void CompilerGLSL::emit_header() +{ + auto &execution = get_entry_point(); + statement("#version ", options.version, options.es && options.version > 100 ? " es" : ""); + + if (!options.es && options.version < 420) + { + // Needed for binding = # on UBOs, etc. + if (options.enable_420pack_extension) + { + statement("#ifdef GL_ARB_shading_language_420pack"); + statement("#extension GL_ARB_shading_language_420pack : require"); + statement("#endif"); + } + // Needed for: layout(early_fragment_tests) in; + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + require_extension_internal("GL_ARB_shader_image_load_store"); + } + + // Needed for: layout(post_depth_coverage) in; + if (execution.flags.get(ExecutionModePostDepthCoverage)) + require_extension_internal("GL_ARB_post_depth_coverage"); + + // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; + bool interlock_used = execution.flags.get(ExecutionModePixelInterlockOrderedEXT) || + execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT); + + if (interlock_used) + { + if (options.es) + { + if (options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock."); + require_extension_internal("GL_NV_fragment_shader_interlock"); + } + else + { + if (options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + require_extension_internal("GL_ARB_fragment_shader_interlock"); + } + } + + for (auto &ext : forced_extensions) + { + if (ext == "GL_ARB_gpu_shader_int64") + { + statement("#if defined(GL_ARB_gpu_shader_int64)"); + statement("#extension GL_ARB_gpu_shader_int64 : require"); + if (!options.vulkan_semantics || options.es) + { + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + } + statement("#else"); + statement("#error No extension available for 64-bit integers."); + statement("#endif"); + } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") + { + // Special case, this extension has a potential fallback to another vendor extension in normal GLSL. + // GL_AMD_gpu_shader_half_float is a superset, so try that first. + statement("#if defined(GL_AMD_gpu_shader_half_float)"); + statement("#extension GL_AMD_gpu_shader_half_float : require"); + if (!options.vulkan_semantics) + { + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + } + else + { + statement("#elif defined(GL_EXT_shader_explicit_arithmetic_types_float16)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_float16 : require"); + } + statement("#else"); + statement("#error No extension available for FP16."); + statement("#endif"); + } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int8") + { + if (options.vulkan_semantics) + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); + else + { + statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int8)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require"); + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + statement("#else"); + statement("#error No extension available for Int8."); + statement("#endif"); + } + } + else if (ext == "GL_EXT_shader_explicit_arithmetic_types_int16") + { + if (options.vulkan_semantics) + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); + else + { + statement("#if defined(GL_EXT_shader_explicit_arithmetic_types_int16)"); + statement("#extension GL_EXT_shader_explicit_arithmetic_types_int16 : require"); + statement("#elif defined(GL_AMD_gpu_shader_int16)"); + statement("#extension GL_AMD_gpu_shader_int16 : require"); + statement("#elif defined(GL_NV_gpu_shader5)"); + statement("#extension GL_NV_gpu_shader5 : require"); + statement("#else"); + statement("#error No extension available for Int16."); + statement("#endif"); + } + } + else if (ext == "GL_ARB_post_depth_coverage") + { + if (options.es) + statement("#extension GL_EXT_post_depth_coverage : require"); + else + { + statement("#if defined(GL_ARB_post_depth_coverge)"); + statement("#extension GL_ARB_post_depth_coverage : require"); + statement("#else"); + statement("#extension GL_EXT_post_depth_coverage : require"); + statement("#endif"); + } + } + else if (!options.vulkan_semantics && ext == "GL_ARB_shader_draw_parameters") + { + // Soft-enable this extension on plain GLSL. + statement("#ifdef ", ext); + statement("#extension ", ext, " : enable"); + statement("#endif"); + } + else if (ext == "GL_EXT_control_flow_attributes") + { + // These are just hints so we can conditionally enable and fallback in the shader. + statement("#if defined(GL_EXT_control_flow_attributes)"); + statement("#extension GL_EXT_control_flow_attributes : require"); + statement("#define SPIRV_CROSS_FLATTEN [[flatten]]"); + statement("#define SPIRV_CROSS_BRANCH [[dont_flatten]]"); + statement("#define SPIRV_CROSS_UNROLL [[unroll]]"); + statement("#define SPIRV_CROSS_LOOP [[dont_unroll]]"); + statement("#else"); + statement("#define SPIRV_CROSS_FLATTEN"); + statement("#define SPIRV_CROSS_BRANCH"); + statement("#define SPIRV_CROSS_UNROLL"); + statement("#define SPIRV_CROSS_LOOP"); + statement("#endif"); + } + else if (ext == "GL_NV_fragment_shader_interlock") + { + statement("#extension GL_NV_fragment_shader_interlock : require"); + statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockNV()"); + statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockNV()"); + } + else if (ext == "GL_ARB_fragment_shader_interlock") + { + statement("#ifdef GL_ARB_fragment_shader_interlock"); + statement("#extension GL_ARB_fragment_shader_interlock : enable"); + statement("#define SPIRV_Cross_beginInvocationInterlock() beginInvocationInterlockARB()"); + statement("#define SPIRV_Cross_endInvocationInterlock() endInvocationInterlockARB()"); + statement("#elif defined(GL_INTEL_fragment_shader_ordering)"); + statement("#extension GL_INTEL_fragment_shader_ordering : enable"); + statement("#define SPIRV_Cross_beginInvocationInterlock() beginFragmentShaderOrderingINTEL()"); + statement("#define SPIRV_Cross_endInvocationInterlock()"); + statement("#endif"); + } + else + statement("#extension ", ext, " : require"); + } + + if (!options.vulkan_semantics) + { + using Supp = ShaderSubgroupSupportHelper; + auto result = shader_subgroup_supporter.resolve(); + + for (uint32_t feature_index = 0; feature_index < Supp::FeatureCount; feature_index++) + { + auto feature = static_cast<Supp::Feature>(feature_index); + if (!shader_subgroup_supporter.is_feature_requested(feature)) + continue; + + auto exts = Supp::get_candidates_for_feature(feature, result); + if (exts.empty()) + continue; + + statement(""); + + for (auto &ext : exts) + { + const char *name = Supp::get_extension_name(ext); + const char *extra_predicate = Supp::get_extra_required_extension_predicate(ext); + auto extra_names = Supp::get_extra_required_extension_names(ext); + statement(&ext != &exts.front() ? "#elif" : "#if", " defined(", name, ")", + (*extra_predicate != '\0' ? " && " : ""), extra_predicate); + for (const auto &e : extra_names) + statement("#extension ", e, " : enable"); + statement("#extension ", name, " : require"); + } + + if (!Supp::can_feature_be_implemented_without_extensions(feature)) + { + statement("#else"); + statement("#error No extensions available to emulate requested subgroup feature."); + } + + statement("#endif"); + } + } + + for (auto &header : header_lines) + statement(header); + + SmallVector<string> inputs; + SmallVector<string> outputs; + + switch (execution.model) + { + case ExecutionModelVertex: + if (options.ovr_multiview_view_count) + inputs.push_back(join("num_views = ", options.ovr_multiview_view_count)); + break; + case ExecutionModelGeometry: + if ((execution.flags.get(ExecutionModeInvocations)) && execution.invocations != 1) + inputs.push_back(join("invocations = ", execution.invocations)); + if (execution.flags.get(ExecutionModeInputPoints)) + inputs.push_back("points"); + if (execution.flags.get(ExecutionModeInputLines)) + inputs.push_back("lines"); + if (execution.flags.get(ExecutionModeInputLinesAdjacency)) + inputs.push_back("lines_adjacency"); + if (execution.flags.get(ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags.get(ExecutionModeInputTrianglesAdjacency)) + inputs.push_back("triangles_adjacency"); + + if (!execution.geometry_passthrough) + { + // For passthrough, these are implies and cannot be declared in shader. + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + if (execution.flags.get(ExecutionModeOutputTriangleStrip)) + outputs.push_back("triangle_strip"); + if (execution.flags.get(ExecutionModeOutputPoints)) + outputs.push_back("points"); + if (execution.flags.get(ExecutionModeOutputLineStrip)) + outputs.push_back("line_strip"); + } + break; + + case ExecutionModelTessellationControl: + if (execution.flags.get(ExecutionModeOutputVertices)) + outputs.push_back(join("vertices = ", execution.output_vertices)); + break; + + case ExecutionModelTessellationEvaluation: + if (execution.flags.get(ExecutionModeQuads)) + inputs.push_back("quads"); + if (execution.flags.get(ExecutionModeTriangles)) + inputs.push_back("triangles"); + if (execution.flags.get(ExecutionModeIsolines)) + inputs.push_back("isolines"); + if (execution.flags.get(ExecutionModePointMode)) + inputs.push_back("point_mode"); + + if (!execution.flags.get(ExecutionModeIsolines)) + { + if (execution.flags.get(ExecutionModeVertexOrderCw)) + inputs.push_back("cw"); + if (execution.flags.get(ExecutionModeVertexOrderCcw)) + inputs.push_back("ccw"); + } + + if (execution.flags.get(ExecutionModeSpacingFractionalEven)) + inputs.push_back("fractional_even_spacing"); + if (execution.flags.get(ExecutionModeSpacingFractionalOdd)) + inputs.push_back("fractional_odd_spacing"); + if (execution.flags.get(ExecutionModeSpacingEqual)) + inputs.push_back("equal_spacing"); + break; + + case ExecutionModelGLCompute: + case ExecutionModelTaskEXT: + case ExecutionModelMeshEXT: + { + if (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId)) + { + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro + // declarations before we can emit the work group size. + if (options.vulkan_semantics || + ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0)))) + build_workgroup_size(inputs, wg_x, wg_y, wg_z); + } + else + { + inputs.push_back(join("local_size_x = ", execution.workgroup_size.x)); + inputs.push_back(join("local_size_y = ", execution.workgroup_size.y)); + inputs.push_back(join("local_size_z = ", execution.workgroup_size.z)); + } + + if (execution.model == ExecutionModelMeshEXT) + { + outputs.push_back(join("max_vertices = ", execution.output_vertices)); + outputs.push_back(join("max_primitives = ", execution.output_primitives)); + if (execution.flags.get(ExecutionModeOutputTrianglesEXT)) + outputs.push_back("triangles"); + else if (execution.flags.get(ExecutionModeOutputLinesEXT)) + outputs.push_back("lines"); + else if (execution.flags.get(ExecutionModeOutputPoints)) + outputs.push_back("points"); + } + break; + } + + case ExecutionModelFragment: + if (options.es) + { + switch (options.fragment.default_float_precision) + { + case Options::Lowp: + statement("precision lowp float;"); + break; + + case Options::Mediump: + statement("precision mediump float;"); + break; + + case Options::Highp: + statement("precision highp float;"); + break; + + default: + break; + } + + switch (options.fragment.default_int_precision) + { + case Options::Lowp: + statement("precision lowp int;"); + break; + + case Options::Mediump: + statement("precision mediump int;"); + break; + + case Options::Highp: + statement("precision highp int;"); + break; + + default: + break; + } + } + + if (execution.flags.get(ExecutionModeEarlyFragmentTests)) + inputs.push_back("early_fragment_tests"); + if (execution.flags.get(ExecutionModePostDepthCoverage)) + inputs.push_back("post_depth_coverage"); + + if (interlock_used) + statement("#if defined(GL_ARB_fragment_shader_interlock)"); + + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT)) + statement("layout(pixel_interlock_ordered) in;"); + else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT)) + statement("layout(pixel_interlock_unordered) in;"); + else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT)) + statement("layout(sample_interlock_ordered) in;"); + else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + statement("layout(sample_interlock_unordered) in;"); + + if (interlock_used) + { + statement("#elif !defined(GL_INTEL_fragment_shader_ordering)"); + statement("#error Fragment Shader Interlock/Ordering extension missing!"); + statement("#endif"); + } + + if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) + statement("layout(depth_greater) out float gl_FragDepth;"); + else if (!options.es && execution.flags.get(ExecutionModeDepthLess)) + statement("layout(depth_less) out float gl_FragDepth;"); + + break; + + default: + break; + } + + for (auto &cap : ir.declared_capabilities) + if (cap == CapabilityRayTraversalPrimitiveCullingKHR) + statement("layout(primitive_culling);"); + + if (!inputs.empty()) + statement("layout(", merge(inputs), ") in;"); + if (!outputs.empty()) + statement("layout(", merge(outputs), ") out;"); + + statement(""); +} + +bool CompilerGLSL::type_is_empty(const SPIRType &type) +{ + return type.basetype == SPIRType::Struct && type.member_types.empty(); +} + +void CompilerGLSL::emit_struct(SPIRType &type) +{ + // Struct types can be stamped out multiple times + // with just different offsets, matrix layouts, etc ... + // Type-punning with these types is legal, which complicates things + // when we are storing struct and array types in an SSBO for example. + // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + return; + + add_resource_name(type.self); + auto name = type_to_glsl(type); + + statement(!backend.explicit_struct_type ? "struct " : "", name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + bool emitted = false; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + emitted = true; + } + + // Don't declare empty structs in GLSL, this is not allowed. + if (type_is_empty(type) && !backend.supports_empty_struct) + { + statement("int empty_struct_member;"); + emitted = true; + } + + if (has_extended_decoration(type.self, SPIRVCrossDecorationPaddingTarget)) + emit_struct_padding_target(type); + + end_scope_decl(); + + if (emitted) + statement(""); +} + +string CompilerGLSL::to_interpolation_qualifiers(const Bitset &flags) +{ + string res; + //if (flags & (1ull << DecorationSmooth)) + // res += "smooth "; + if (flags.get(DecorationFlat)) + res += "flat "; + if (flags.get(DecorationNoPerspective)) + { + if (options.es) + { + if (options.version < 300) + SPIRV_CROSS_THROW("noperspective requires ESSL 300."); + require_extension_internal("GL_NV_shader_noperspective_interpolation"); + } + else if (is_legacy_desktop()) + require_extension_internal("GL_EXT_gpu_shader4"); + res += "noperspective "; + } + if (flags.get(DecorationCentroid)) + res += "centroid "; + if (flags.get(DecorationPatch)) + res += "patch "; + if (flags.get(DecorationSample)) + { + if (options.es) + { + if (options.version < 300) + SPIRV_CROSS_THROW("sample requires ESSL 300."); + else if (options.version < 320) + require_extension_internal("GL_OES_shader_multisample_interpolation"); + } + res += "sample "; + } + if (flags.get(DecorationInvariant) && (options.es || options.version >= 120)) + res += "invariant "; + if (flags.get(DecorationPerPrimitiveEXT)) + { + res += "perprimitiveEXT "; + require_extension_internal("GL_EXT_mesh_shader"); + } + + if (flags.get(DecorationExplicitInterpAMD)) + { + require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); + res += "__explicitInterpAMD "; + } + + if (flags.get(DecorationPerVertexKHR)) + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("pervertexEXT requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("pervertexEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + res += "pervertexNV "; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + res += "pervertexEXT "; + } + } + + return res; +} + +string CompilerGLSL::layout_for_member(const SPIRType &type, uint32_t index) +{ + if (is_legacy()) + return ""; + + bool is_block = has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + if (!is_block) + return ""; + + auto &memb = ir.meta[type.self].members; + if (index >= memb.size()) + return ""; + auto &dec = memb[index]; + + SmallVector<string> attr; + + if (has_member_decoration(type.self, index, DecorationPassthroughNV)) + attr.push_back("passthrough"); + + // We can only apply layouts on members in block interfaces. + // This is a bit problematic because in SPIR-V decorations are applied on the struct types directly. + // This is not supported on GLSL, so we have to make the assumption that if a struct within our buffer block struct + // has a decoration, it was originally caused by a top-level layout() qualifier in GLSL. + // + // We would like to go from (SPIR-V style): + // + // struct Foo { layout(row_major) mat4 matrix; }; + // buffer UBO { Foo foo; }; + // + // to + // + // struct Foo { mat4 matrix; }; // GLSL doesn't support any layout shenanigans in raw struct declarations. + // buffer UBO { layout(row_major) Foo foo; }; // Apply the layout on top-level. + auto flags = combined_decoration_for_member(type, index); + + if (flags.get(DecorationRowMajor)) + attr.push_back("row_major"); + // We don't emit any global layouts, so column_major is default. + //if (flags & (1ull << DecorationColMajor)) + // attr.push_back("column_major"); + + if (dec.decoration_flags.get(DecorationLocation) && can_use_io_location(type.storage, true)) + attr.push_back(join("location = ", dec.location)); + + // Can only declare component if we can declare location. + if (dec.decoration_flags.get(DecorationComponent) && can_use_io_location(type.storage, true)) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); + attr.push_back(join("component = ", dec.component)); + } + else + SPIRV_CROSS_THROW("Component decoration is not supported in ES targets."); + } + + // SPIRVCrossDecorationPacked is set by layout_for_variable earlier to mark that we need to emit offset qualifiers. + // This is only done selectively in GLSL as needed. + if (has_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset) && + dec.decoration_flags.get(DecorationOffset)) + attr.push_back(join("offset = ", dec.offset)); + else if (type.storage == StorageClassOutput && dec.decoration_flags.get(DecorationOffset)) + attr.push_back(join("xfb_offset = ", dec.offset)); + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +const char *CompilerGLSL::format_to_glsl(spv::ImageFormat format) +{ + if (options.es && is_desktop_only_format(format)) + SPIRV_CROSS_THROW("Attempting to use image format not supported in ES profile."); + + switch (format) + { + case ImageFormatRgba32f: + return "rgba32f"; + case ImageFormatRgba16f: + return "rgba16f"; + case ImageFormatR32f: + return "r32f"; + case ImageFormatRgba8: + return "rgba8"; + case ImageFormatRgba8Snorm: + return "rgba8_snorm"; + case ImageFormatRg32f: + return "rg32f"; + case ImageFormatRg16f: + return "rg16f"; + case ImageFormatRgba32i: + return "rgba32i"; + case ImageFormatRgba16i: + return "rgba16i"; + case ImageFormatR32i: + return "r32i"; + case ImageFormatRgba8i: + return "rgba8i"; + case ImageFormatRg32i: + return "rg32i"; + case ImageFormatRg16i: + return "rg16i"; + case ImageFormatRgba32ui: + return "rgba32ui"; + case ImageFormatRgba16ui: + return "rgba16ui"; + case ImageFormatR32ui: + return "r32ui"; + case ImageFormatRgba8ui: + return "rgba8ui"; + case ImageFormatRg32ui: + return "rg32ui"; + case ImageFormatRg16ui: + return "rg16ui"; + case ImageFormatR11fG11fB10f: + return "r11f_g11f_b10f"; + case ImageFormatR16f: + return "r16f"; + case ImageFormatRgb10A2: + return "rgb10_a2"; + case ImageFormatR8: + return "r8"; + case ImageFormatRg8: + return "rg8"; + case ImageFormatR16: + return "r16"; + case ImageFormatRg16: + return "rg16"; + case ImageFormatRgba16: + return "rgba16"; + case ImageFormatR16Snorm: + return "r16_snorm"; + case ImageFormatRg16Snorm: + return "rg16_snorm"; + case ImageFormatRgba16Snorm: + return "rgba16_snorm"; + case ImageFormatR8Snorm: + return "r8_snorm"; + case ImageFormatRg8Snorm: + return "rg8_snorm"; + case ImageFormatR8ui: + return "r8ui"; + case ImageFormatRg8ui: + return "rg8ui"; + case ImageFormatR16ui: + return "r16ui"; + case ImageFormatRgb10a2ui: + return "rgb10_a2ui"; + case ImageFormatR8i: + return "r8i"; + case ImageFormatRg8i: + return "rg8i"; + case ImageFormatR16i: + return "r16i"; + case ImageFormatR64i: + return "r64i"; + case ImageFormatR64ui: + return "r64ui"; + default: + case ImageFormatUnknown: + return nullptr; + } +} + +uint32_t CompilerGLSL::type_to_packed_base_size(const SPIRType &type, BufferPackingStandard) +{ + switch (type.basetype) + { + case SPIRType::Double: + case SPIRType::Int64: + case SPIRType::UInt64: + return 8; + case SPIRType::Float: + case SPIRType::Int: + case SPIRType::UInt: + return 4; + case SPIRType::Half: + case SPIRType::Short: + case SPIRType::UShort: + return 2; + case SPIRType::SByte: + case SPIRType::UByte: + return 1; + + default: + SPIRV_CROSS_THROW("Unrecognized type in type_to_packed_base_size."); + } +} + +uint32_t CompilerGLSL::type_to_packed_alignment(const SPIRType &type, const Bitset &flags, + BufferPackingStandard packing) +{ + // If using PhysicalStorageBufferEXT storage class, this is a pointer, + // and is 64-bit. + if (is_physical_pointer(type)) + { + if (!type.pointer) + SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + { + if (packing_is_vec4_padded(packing) && type_is_array_of_pointers(type)) + return 16; + else + return 8; + } + else + SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); + } + else if (is_array(type)) + { + uint32_t minimum_alignment = 1; + if (packing_is_vec4_padded(packing)) + minimum_alignment = 16; + + auto *tmp = &get<SPIRType>(type.parent_type); + while (!tmp->array.empty()) + tmp = &get<SPIRType>(tmp->parent_type); + + // Get the alignment of the base type, then maybe round up. + return max(minimum_alignment, type_to_packed_alignment(*tmp, flags, packing)); + } + + if (type.basetype == SPIRType::Struct) + { + // Rule 9. Structs alignments are maximum alignment of its members. + uint32_t alignment = 1; + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = ir.meta[type.self].members[i].decoration_flags; + alignment = + max(alignment, type_to_packed_alignment(get<SPIRType>(type.member_types[i]), member_flags, packing)); + } + + // In std140, struct alignment is rounded up to 16. + if (packing_is_vec4_padded(packing)) + alignment = max<uint32_t>(alignment, 16u); + + return alignment; + } + else + { + const uint32_t base_alignment = type_to_packed_base_size(type, packing); + + // Alignment requirement for scalar block layout is always the alignment for the most basic component. + if (packing_is_scalar(packing)) + return base_alignment; + + // Vectors are *not* aligned in HLSL, but there's an extra rule where vectors cannot straddle + // a vec4, this is handled outside since that part knows our current offset. + if (type.columns == 1 && packing_is_hlsl(packing)) + return base_alignment; + + // From 7.6.2.2 in GL 4.5 core spec. + // Rule 1 + if (type.vecsize == 1 && type.columns == 1) + return base_alignment; + + // Rule 2 + if ((type.vecsize == 2 || type.vecsize == 4) && type.columns == 1) + return type.vecsize * base_alignment; + + // Rule 3 + if (type.vecsize == 3 && type.columns == 1) + return 4 * base_alignment; + + // Rule 4 implied. Alignment does not change in std430. + + // Rule 5. Column-major matrices are stored as arrays of + // vectors. + if (flags.get(DecorationColMajor) && type.columns > 1) + { + if (packing_is_vec4_padded(packing)) + return 4 * base_alignment; + else if (type.vecsize == 3) + return 4 * base_alignment; + else + return type.vecsize * base_alignment; + } + + // Rule 6 implied. + + // Rule 7. + if (flags.get(DecorationRowMajor) && type.vecsize > 1) + { + if (packing_is_vec4_padded(packing)) + return 4 * base_alignment; + else if (type.columns == 3) + return 4 * base_alignment; + else + return type.columns * base_alignment; + } + + // Rule 8 implied. + } + + SPIRV_CROSS_THROW("Did not find suitable rule for type. Bogus decorations?"); +} + +uint32_t CompilerGLSL::type_to_packed_array_stride(const SPIRType &type, const Bitset &flags, + BufferPackingStandard packing) +{ + // Array stride is equal to aligned size of the underlying type. + uint32_t parent = type.parent_type; + assert(parent); + + auto &tmp = get<SPIRType>(parent); + + uint32_t size = type_to_packed_size(tmp, flags, packing); + uint32_t alignment = type_to_packed_alignment(type, flags, packing); + return (size + alignment - 1) & ~(alignment - 1); +} + +uint32_t CompilerGLSL::type_to_packed_size(const SPIRType &type, const Bitset &flags, BufferPackingStandard packing) +{ + // If using PhysicalStorageBufferEXT storage class, this is a pointer, + // and is 64-bit. + if (is_physical_pointer(type)) + { + if (!type.pointer) + SPIRV_CROSS_THROW("Types in PhysicalStorageBufferEXT must be pointers."); + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + return 8; + else + SPIRV_CROSS_THROW("AddressingModelPhysicalStorageBuffer64EXT must be used for PhysicalStorageBufferEXT."); + } + else if (is_array(type)) + { + uint32_t packed_size = to_array_size_literal(type) * type_to_packed_array_stride(type, flags, packing); + + // For arrays of vectors and matrices in HLSL, the last element has a size which depends on its vector size, + // so that it is possible to pack other vectors into the last element. + if (packing_is_hlsl(packing) && type.basetype != SPIRType::Struct) + packed_size -= (4 - type.vecsize) * (type.width / 8); + + return packed_size; + } + + uint32_t size = 0; + + if (type.basetype == SPIRType::Struct) + { + uint32_t pad_alignment = 1; + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto member_flags = ir.meta[type.self].members[i].decoration_flags; + auto &member_type = get<SPIRType>(type.member_types[i]); + + uint32_t packed_alignment = type_to_packed_alignment(member_type, member_flags, packing); + uint32_t alignment = max(packed_alignment, pad_alignment); + + // The next member following a struct member is aligned to the base alignment of the struct that came before. + // GL 4.5 spec, 7.6.2.2. + if (member_type.basetype == SPIRType::Struct) + pad_alignment = packed_alignment; + else + pad_alignment = 1; + + size = (size + alignment - 1) & ~(alignment - 1); + size += type_to_packed_size(member_type, member_flags, packing); + } + } + else + { + const uint32_t base_alignment = type_to_packed_base_size(type, packing); + + if (packing_is_scalar(packing)) + { + size = type.vecsize * type.columns * base_alignment; + } + else + { + if (type.columns == 1) + size = type.vecsize * base_alignment; + + if (flags.get(DecorationColMajor) && type.columns > 1) + { + if (packing_is_vec4_padded(packing)) + size = type.columns * 4 * base_alignment; + else if (type.vecsize == 3) + size = type.columns * 4 * base_alignment; + else + size = type.columns * type.vecsize * base_alignment; + } + + if (flags.get(DecorationRowMajor) && type.vecsize > 1) + { + if (packing_is_vec4_padded(packing)) + size = type.vecsize * 4 * base_alignment; + else if (type.columns == 3) + size = type.vecsize * 4 * base_alignment; + else + size = type.vecsize * type.columns * base_alignment; + } + + // For matrices in HLSL, the last element has a size which depends on its vector size, + // so that it is possible to pack other vectors into the last element. + if (packing_is_hlsl(packing) && type.columns > 1) + size -= (4 - type.vecsize) * (type.width / 8); + } + } + + return size; +} + +bool CompilerGLSL::buffer_is_packing_standard(const SPIRType &type, BufferPackingStandard packing, + uint32_t *failed_validation_index, uint32_t start_offset, + uint32_t end_offset) +{ + // This is very tricky and error prone, but try to be exhaustive and correct here. + // SPIR-V doesn't directly say if we're using std430 or std140. + // SPIR-V communicates this using Offset and ArrayStride decorations (which is what really matters), + // so we have to try to infer whether or not the original GLSL source was std140 or std430 based on this information. + // We do not have to consider shared or packed since these layouts are not allowed in Vulkan SPIR-V (they are useless anyways, and custom offsets would do the same thing). + // + // It is almost certain that we're using std430, but it gets tricky with arrays in particular. + // We will assume std430, but infer std140 if we can prove the struct is not compliant with std430. + // + // The only two differences between std140 and std430 are related to padding alignment/array stride + // in arrays and structs. In std140 they take minimum vec4 alignment. + // std430 only removes the vec4 requirement. + + uint32_t offset = 0; + uint32_t pad_alignment = 1; + + bool is_top_level_block = + has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock); + + for (uint32_t i = 0; i < type.member_types.size(); i++) + { + auto &memb_type = get<SPIRType>(type.member_types[i]); + + auto *type_meta = ir.find_meta(type.self); + auto member_flags = type_meta ? type_meta->members[i].decoration_flags : Bitset{}; + + // Verify alignment rules. + uint32_t packed_alignment = type_to_packed_alignment(memb_type, member_flags, packing); + + // This is a rather dirty workaround to deal with some cases of OpSpecConstantOp used as array size, e.g: + // layout(constant_id = 0) const int s = 10; + // const int S = s + 5; // SpecConstantOp + // buffer Foo { int data[S]; }; // <-- Very hard for us to deduce a fixed value here, + // we would need full implementation of compile-time constant folding. :( + // If we are the last member of a struct, there might be cases where the actual size of that member is irrelevant + // for our analysis (e.g. unsized arrays). + // This lets us simply ignore that there are spec constant op sized arrays in our buffers. + // Querying size of this member will fail, so just don't call it unless we have to. + // + // This is likely "best effort" we can support without going into unacceptably complicated workarounds. + bool member_can_be_unsized = + is_top_level_block && size_t(i + 1) == type.member_types.size() && !memb_type.array.empty(); + + uint32_t packed_size = 0; + if (!member_can_be_unsized || packing_is_hlsl(packing)) + packed_size = type_to_packed_size(memb_type, member_flags, packing); + + // We only need to care about this if we have non-array types which can straddle the vec4 boundary. + uint32_t actual_offset = type_struct_member_offset(type, i); + + if (packing_is_hlsl(packing)) + { + // If a member straddles across a vec4 boundary, alignment is actually vec4. + uint32_t target_offset; + + // If we intend to use explicit packing, we must check for improper straddle with that offset. + // In implicit packing, we must check with implicit offset, since the explicit offset + // might have already accounted for the straddle, and we'd miss the alignment promotion to vec4. + // This is important when packing sub-structs that don't support packoffset(). + if (packing_has_flexible_offset(packing)) + target_offset = actual_offset; + else + target_offset = offset; + + uint32_t begin_word = target_offset / 16; + uint32_t end_word = (target_offset + packed_size - 1) / 16; + + if (begin_word != end_word) + packed_alignment = max<uint32_t>(packed_alignment, 16u); + } + + // Field is not in the specified range anymore and we can ignore any further fields. + if (actual_offset >= end_offset) + break; + + uint32_t alignment = max(packed_alignment, pad_alignment); + offset = (offset + alignment - 1) & ~(alignment - 1); + + // The next member following a struct member is aligned to the base alignment of the struct that came before. + // GL 4.5 spec, 7.6.2.2. + if (memb_type.basetype == SPIRType::Struct && !memb_type.pointer) + pad_alignment = packed_alignment; + else + pad_alignment = 1; + + // Only care about packing if we are in the given range + if (actual_offset >= start_offset) + { + // We only care about offsets in std140, std430, etc ... + // For EnhancedLayout variants, we have the flexibility to choose our own offsets. + if (!packing_has_flexible_offset(packing)) + { + if (actual_offset != offset) // This cannot be the packing we're looking for. + { + if (failed_validation_index) + *failed_validation_index = i; + return false; + } + } + else if ((actual_offset & (alignment - 1)) != 0) + { + // We still need to verify that alignment rules are observed, even if we have explicit offset. + if (failed_validation_index) + *failed_validation_index = i; + return false; + } + + // Verify array stride rules. + if (is_array(memb_type) && + type_to_packed_array_stride(memb_type, member_flags, packing) != + type_struct_member_array_stride(type, i)) + { + if (failed_validation_index) + *failed_validation_index = i; + return false; + } + + // Verify that sub-structs also follow packing rules. + // We cannot use enhanced layouts on substructs, so they better be up to spec. + auto substruct_packing = packing_to_substruct_packing(packing); + + if (!memb_type.pointer && !memb_type.member_types.empty() && + !buffer_is_packing_standard(memb_type, substruct_packing)) + { + if (failed_validation_index) + *failed_validation_index = i; + return false; + } + } + + // Bump size. + offset = actual_offset + packed_size; + } + + return true; +} + +bool CompilerGLSL::can_use_io_location(StorageClass storage, bool block) +{ + // Location specifiers are must have in SPIR-V, but they aren't really supported in earlier versions of GLSL. + // Be very explicit here about how to solve the issue. + if ((get_execution_model() != ExecutionModelVertex && storage == StorageClassInput) || + (get_execution_model() != ExecutionModelFragment && storage == StorageClassOutput)) + { + uint32_t minimum_desktop_version = block ? 440 : 410; + // ARB_enhanced_layouts vs ARB_separate_shader_objects ... + + if (!options.es && options.version < minimum_desktop_version && !options.separate_shader_objects) + return false; + else if (options.es && options.version < 310) + return false; + } + + if ((get_execution_model() == ExecutionModelVertex && storage == StorageClassInput) || + (get_execution_model() == ExecutionModelFragment && storage == StorageClassOutput)) + { + if (options.es && options.version < 300) + return false; + else if (!options.es && options.version < 330) + return false; + } + + if (storage == StorageClassUniform || storage == StorageClassUniformConstant || storage == StorageClassPushConstant) + { + if (options.es && options.version < 310) + return false; + else if (!options.es && options.version < 430) + return false; + } + + return true; +} + +string CompilerGLSL::layout_for_variable(const SPIRVariable &var) +{ + // FIXME: Come up with a better solution for when to disable layouts. + // Having layouts depend on extensions as well as which types + // of layouts are used. For now, the simple solution is to just disable + // layouts for legacy versions. + if (is_legacy()) + return ""; + + if (subpass_input_is_framebuffer_fetch(var.self)) + return ""; + + SmallVector<string> attr; + + auto &type = get<SPIRType>(var.basetype); + auto &flags = get_decoration_bitset(var.self); + auto &typeflags = get_decoration_bitset(type.self); + + if (flags.get(DecorationPassthroughNV)) + attr.push_back("passthrough"); + + if (options.vulkan_semantics && var.storage == StorageClassPushConstant) + attr.push_back("push_constant"); + else if (var.storage == StorageClassShaderRecordBufferKHR) + attr.push_back(ray_tracing_is_khr ? "shaderRecordEXT" : "shaderRecordNV"); + + if (flags.get(DecorationRowMajor)) + attr.push_back("row_major"); + if (flags.get(DecorationColMajor)) + attr.push_back("column_major"); + + if (options.vulkan_semantics) + { + if (flags.get(DecorationInputAttachmentIndex)) + attr.push_back(join("input_attachment_index = ", get_decoration(var.self, DecorationInputAttachmentIndex))); + } + + bool is_block = has_decoration(type.self, DecorationBlock); + if (flags.get(DecorationLocation) && can_use_io_location(var.storage, is_block)) + { + Bitset combined_decoration; + for (uint32_t i = 0; i < ir.meta[type.self].members.size(); i++) + combined_decoration.merge_or(combined_decoration_for_member(type, i)); + + // If our members have location decorations, we don't need to + // emit location decorations at the top as well (looks weird). + if (!combined_decoration.get(DecorationLocation)) + attr.push_back(join("location = ", get_decoration(var.self, DecorationLocation))); + } + + if (get_execution_model() == ExecutionModelFragment && var.storage == StorageClassOutput && + location_is_non_coherent_framebuffer_fetch(get_decoration(var.self, DecorationLocation))) + { + attr.push_back("noncoherent"); + } + + // Transform feedback + bool uses_enhanced_layouts = false; + if (is_block && var.storage == StorageClassOutput) + { + // For blocks, there is a restriction where xfb_stride/xfb_buffer must only be declared on the block itself, + // since all members must match the same xfb_buffer. The only thing we will declare for members of the block + // is the xfb_offset. + uint32_t member_count = uint32_t(type.member_types.size()); + bool have_xfb_buffer_stride = false; + bool have_any_xfb_offset = false; + bool have_geom_stream = false; + uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; + + if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride)) + { + have_xfb_buffer_stride = true; + xfb_buffer = get_decoration(var.self, DecorationXfbBuffer); + xfb_stride = get_decoration(var.self, DecorationXfbStride); + } + + if (flags.get(DecorationStream)) + { + have_geom_stream = true; + geom_stream = get_decoration(var.self, DecorationStream); + } + + // Verify that none of the members violate our assumption. + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationStream)) + { + uint32_t member_geom_stream = get_member_decoration(type.self, i, DecorationStream); + if (have_geom_stream && member_geom_stream != geom_stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = member_geom_stream; + } + + // Only members with an Offset decoration participate in XFB. + if (!has_member_decoration(type.self, i, DecorationOffset)) + continue; + have_any_xfb_offset = true; + + if (has_member_decoration(type.self, i, DecorationXfbBuffer)) + { + uint32_t buffer_index = get_member_decoration(type.self, i, DecorationXfbBuffer); + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + } + + if (has_member_decoration(type.self, i, DecorationXfbStride)) + { + uint32_t stride = get_member_decoration(type.self, i, DecorationXfbStride); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbStride mismatch."); + have_xfb_buffer_stride = true; + xfb_stride = stride; + } + } + + if (have_xfb_buffer_stride && have_any_xfb_offset) + { + attr.push_back(join("xfb_buffer = ", xfb_buffer)); + attr.push_back(join("xfb_stride = ", xfb_stride)); + uses_enhanced_layouts = true; + } + + if (have_geom_stream) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); + } + } + else if (var.storage == StorageClassOutput) + { + if (flags.get(DecorationXfbBuffer) && flags.get(DecorationXfbStride) && flags.get(DecorationOffset)) + { + // XFB for standalone variables, we can emit all decorations. + attr.push_back(join("xfb_buffer = ", get_decoration(var.self, DecorationXfbBuffer))); + attr.push_back(join("xfb_stride = ", get_decoration(var.self, DecorationXfbStride))); + attr.push_back(join("xfb_offset = ", get_decoration(var.self, DecorationOffset))); + uses_enhanced_layouts = true; + } + + if (flags.get(DecorationStream)) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", get_decoration(var.self, DecorationStream))); + } + } + + // Can only declare Component if we can declare location. + if (flags.get(DecorationComponent) && can_use_io_location(var.storage, is_block)) + { + uses_enhanced_layouts = true; + attr.push_back(join("component = ", get_decoration(var.self, DecorationComponent))); + } + + if (uses_enhanced_layouts) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in targets below GLSL 1.40."); + if (!options.es && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + } + else if (options.es) + SPIRV_CROSS_THROW("GL_ARB_enhanced_layouts is not supported in ESSL."); + } + + if (flags.get(DecorationIndex)) + attr.push_back(join("index = ", get_decoration(var.self, DecorationIndex))); + + // Do not emit set = decoration in regular GLSL output, but + // we need to preserve it in Vulkan GLSL mode. + if (var.storage != StorageClassPushConstant && var.storage != StorageClassShaderRecordBufferKHR) + { + if (flags.get(DecorationDescriptorSet) && options.vulkan_semantics) + attr.push_back(join("set = ", get_decoration(var.self, DecorationDescriptorSet))); + } + + bool push_constant_block = options.vulkan_semantics && var.storage == StorageClassPushConstant; + bool ssbo_block = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || + (var.storage == StorageClassUniform && typeflags.get(DecorationBufferBlock)); + bool emulated_ubo = var.storage == StorageClassPushConstant && options.emit_push_constant_as_uniform_buffer; + bool ubo_block = var.storage == StorageClassUniform && typeflags.get(DecorationBlock); + + // GL 3.0/GLSL 1.30 is not considered legacy, but it doesn't have UBOs ... + bool can_use_buffer_blocks = (options.es && options.version >= 300) || (!options.es && options.version >= 140); + + // pretend no UBOs when options say so + if (ubo_block && options.emit_uniform_buffer_as_plain_uniforms) + can_use_buffer_blocks = false; + + bool can_use_binding; + if (options.es) + can_use_binding = options.version >= 310; + else + can_use_binding = options.enable_420pack_extension || (options.version >= 420); + + // Make sure we don't emit binding layout for a classic uniform on GLSL 1.30. + if (!can_use_buffer_blocks && var.storage == StorageClassUniform) + can_use_binding = false; + + if (var.storage == StorageClassShaderRecordBufferKHR) + can_use_binding = false; + + if (can_use_binding && flags.get(DecorationBinding)) + attr.push_back(join("binding = ", get_decoration(var.self, DecorationBinding))); + + if (var.storage != StorageClassOutput && flags.get(DecorationOffset)) + attr.push_back(join("offset = ", get_decoration(var.self, DecorationOffset))); + + // Instead of adding explicit offsets for every element here, just assume we're using std140 or std430. + // If SPIR-V does not comply with either layout, we cannot really work around it. + if (can_use_buffer_blocks && (ubo_block || emulated_ubo)) + { + attr.push_back(buffer_to_packing_standard(type, false, true)); + } + else if (can_use_buffer_blocks && (push_constant_block || ssbo_block)) + { + attr.push_back(buffer_to_packing_standard(type, true, true)); + } + + // For images, the type itself adds a layout qualifer. + // Only emit the format for storage images. + if (type.basetype == SPIRType::Image && type.image.sampled == 2) + { + const char *fmt = format_to_glsl(type.image.format); + if (fmt) + attr.push_back(fmt); + } + + if (attr.empty()) + return ""; + + string res = "layout("; + res += merge(attr); + res += ") "; + return res; +} + +string CompilerGLSL::buffer_to_packing_standard(const SPIRType &type, + bool support_std430_without_scalar_layout, + bool support_enhanced_layouts) +{ + if (support_std430_without_scalar_layout && buffer_is_packing_standard(type, BufferPackingStd430)) + return "std430"; + else if (buffer_is_packing_standard(type, BufferPackingStd140)) + return "std140"; + else if (options.vulkan_semantics && buffer_is_packing_standard(type, BufferPackingScalar)) + { + require_extension_internal("GL_EXT_scalar_block_layout"); + return "scalar"; + } + else if (support_std430_without_scalar_layout && + support_enhanced_layouts && + buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) + { + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + return "std430"; + } + else if (support_enhanced_layouts && + buffer_is_packing_standard(type, BufferPackingStd140EnhancedLayout)) + { + // Fallback time. We might be able to use the ARB_enhanced_layouts to deal with this difference, + // however, we can only use layout(offset) on the block itself, not any substructs, so the substructs better be the appropriate layout. + // Enhanced layouts seem to always work in Vulkan GLSL, so no need for extensions there. + if (options.es && !options.vulkan_semantics) + SPIRV_CROSS_THROW("Push constant block cannot be expressed as neither std430 nor std140. ES-targets do " + "not support GL_ARB_enhanced_layouts."); + if (!options.es && !options.vulkan_semantics && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + return "std140"; + } + else if (options.vulkan_semantics && + support_enhanced_layouts && + buffer_is_packing_standard(type, BufferPackingScalarEnhancedLayout)) + { + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + require_extension_internal("GL_EXT_scalar_block_layout"); + return "scalar"; + } + else if (!support_std430_without_scalar_layout && options.vulkan_semantics && + buffer_is_packing_standard(type, BufferPackingStd430)) + { + // UBOs can support std430 with GL_EXT_scalar_block_layout. + require_extension_internal("GL_EXT_scalar_block_layout"); + return "std430"; + } + else if (!support_std430_without_scalar_layout && options.vulkan_semantics && + support_enhanced_layouts && + buffer_is_packing_standard(type, BufferPackingStd430EnhancedLayout)) + { + // UBOs can support std430 with GL_EXT_scalar_block_layout. + set_extended_decoration(type.self, SPIRVCrossDecorationExplicitOffset); + require_extension_internal("GL_EXT_scalar_block_layout"); + return "std430"; + } + else + { + SPIRV_CROSS_THROW("Buffer block cannot be expressed as any of std430, std140, scalar, even with enhanced " + "layouts. You can try flattening this block to support a more flexible layout."); + } +} + +void CompilerGLSL::emit_push_constant_block(const SPIRVariable &var) +{ + if (flattened_buffer_blocks.count(var.self)) + emit_buffer_block_flattened(var); + else if (options.vulkan_semantics) + emit_push_constant_block_vulkan(var); + else if (options.emit_push_constant_as_uniform_buffer) + emit_buffer_block_native(var); + else + emit_push_constant_block_glsl(var); +} + +void CompilerGLSL::emit_push_constant_block_vulkan(const SPIRVariable &var) +{ + emit_buffer_block(var); +} + +void CompilerGLSL::emit_push_constant_block_glsl(const SPIRVariable &var) +{ + // OpenGL has no concept of push constant blocks, implement it as a uniform struct. + auto &type = get<SPIRType>(var.basetype); + + unset_decoration(var.self, DecorationBinding); + unset_decoration(var.self, DecorationDescriptorSet); + +#if 0 + if (flags & ((1ull << DecorationBinding) | (1ull << DecorationDescriptorSet))) + SPIRV_CROSS_THROW("Push constant blocks cannot be compiled to GLSL with Binding or Set syntax. " + "Remap to location with reflection API first or disable these decorations."); +#endif + + // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. + // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. + bool block_flag = has_decoration(type.self, DecorationBlock); + unset_decoration(type.self, DecorationBlock); + + emit_struct(type); + + if (block_flag) + set_decoration(type.self, DecorationBlock); + + emit_uniform(var); + statement(""); +} + +void CompilerGLSL::emit_buffer_block(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + bool ubo_block = var.storage == StorageClassUniform && has_decoration(type.self, DecorationBlock); + + if (flattened_buffer_blocks.count(var.self)) + emit_buffer_block_flattened(var); + else if (is_legacy() || (!options.es && options.version == 130) || + (ubo_block && options.emit_uniform_buffer_as_plain_uniforms)) + emit_buffer_block_legacy(var); + else + emit_buffer_block_native(var); +} + +void CompilerGLSL::emit_buffer_block_legacy(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + bool ssbo = var.storage == StorageClassStorageBuffer || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + if (ssbo) + SPIRV_CROSS_THROW("SSBOs not supported in legacy targets."); + + // We're emitting the push constant block as a regular struct, so disable the block qualifier temporarily. + // Otherwise, we will end up emitting layout() qualifiers on naked structs which is not allowed. + auto &block_flags = ir.meta[type.self].decoration.decoration_flags; + bool block_flag = block_flags.get(DecorationBlock); + block_flags.clear(DecorationBlock); + emit_struct(type); + if (block_flag) + block_flags.set(DecorationBlock); + emit_uniform(var); + statement(""); +} + +void CompilerGLSL::emit_buffer_reference_block(uint32_t type_id, bool forward_declaration) +{ + auto &type = get<SPIRType>(type_id); + string buffer_name; + + if (forward_declaration && is_physical_pointer_to_buffer_block(type)) + { + // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... + // Allow aliased name since we might be declaring the block twice. Once with buffer reference (forward declared) and one proper declaration. + // The names must match up. + buffer_name = to_name(type.self, false); + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + // If we have a collision for any reason, just fallback immediately. + if (ir.meta[type.self].decoration.alias.empty() || + block_ssbo_names.find(buffer_name) != end(block_ssbo_names) || + resource_names.find(buffer_name) != end(resource_names)) + { + buffer_name = join("_", type.self); + } + + // Make sure we get something unique for both global name scope and block name scope. + // See GLSL 4.5 spec: section 4.3.9 for details. + add_variable(block_ssbo_names, resource_names, buffer_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. + if (buffer_name.empty()) + buffer_name = join("_", type.self); + + block_names.insert(buffer_name); + block_ssbo_names.insert(buffer_name); + + // Ensure we emit the correct name when emitting non-forward pointer type. + ir.meta[type.self].decoration.alias = buffer_name; + } + else + { + buffer_name = type_to_glsl(type); + } + + if (!forward_declaration) + { + auto itr = physical_storage_type_to_alignment.find(type_id); + uint32_t alignment = 0; + if (itr != physical_storage_type_to_alignment.end()) + alignment = itr->second.alignment; + + if (is_physical_pointer_to_buffer_block(type)) + { + SmallVector<std::string> attributes; + attributes.push_back("buffer_reference"); + if (alignment) + attributes.push_back(join("buffer_reference_align = ", alignment)); + attributes.push_back(buffer_to_packing_standard(type, true, true)); + + auto flags = ir.get_buffer_block_type_flags(type); + string decorations; + if (flags.get(DecorationRestrict)) + decorations += " restrict"; + if (flags.get(DecorationCoherent)) + decorations += " coherent"; + if (flags.get(DecorationNonReadable)) + decorations += " writeonly"; + if (flags.get(DecorationNonWritable)) + decorations += " readonly"; + + statement("layout(", merge(attributes), ")", decorations, " buffer ", buffer_name); + } + else + { + string packing_standard; + if (type.basetype == SPIRType::Struct) + { + // The non-block type is embedded in a block, so we cannot use enhanced layouts :( + packing_standard = buffer_to_packing_standard(type, true, false) + ", "; + } + else if (is_array(get_pointee_type(type))) + { + SPIRType wrap_type{OpTypeStruct}; + wrap_type.self = ir.increase_bound_by(1); + wrap_type.member_types.push_back(get_pointee_type_id(type_id)); + ir.set_member_decoration(wrap_type.self, 0, DecorationOffset, 0); + packing_standard = buffer_to_packing_standard(wrap_type, true, false) + ", "; + } + + if (alignment) + statement("layout(", packing_standard, "buffer_reference, buffer_reference_align = ", alignment, ") buffer ", buffer_name); + else + statement("layout(", packing_standard, "buffer_reference) buffer ", buffer_name); + } + + begin_scope(); + + if (is_physical_pointer_to_buffer_block(type)) + { + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + } + } + else + { + auto &pointee_type = get_pointee_type(type); + statement(type_to_glsl(pointee_type), " value", type_to_array_glsl(pointee_type, 0), ";"); + } + + end_scope_decl(); + statement(""); + } + else + { + statement("layout(buffer_reference) buffer ", buffer_name, ";"); + } +} + +void CompilerGLSL::emit_buffer_block_native(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + + Bitset flags = ir.get_buffer_block_flags(var); + bool ssbo = var.storage == StorageClassStorageBuffer || var.storage == StorageClassShaderRecordBufferKHR || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + bool is_restrict = ssbo && flags.get(DecorationRestrict); + bool is_writeonly = ssbo && flags.get(DecorationNonReadable); + bool is_readonly = ssbo && flags.get(DecorationNonWritable); + bool is_coherent = ssbo && flags.get(DecorationCoherent); + + // Block names should never alias, but from HLSL input they kind of can because block types are reused for UAVs ... + auto buffer_name = to_name(type.self, false); + + auto &block_namespace = ssbo ? block_ssbo_names : block_ubo_names; + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + // If we have a collision for any reason, just fallback immediately. + if (ir.meta[type.self].decoration.alias.empty() || block_namespace.find(buffer_name) != end(block_namespace) || + resource_names.find(buffer_name) != end(resource_names)) + { + buffer_name = get_block_fallback_name(var.self); + } + + // Make sure we get something unique for both global name scope and block name scope. + // See GLSL 4.5 spec: section 4.3.9 for details. + add_variable(block_namespace, resource_names, buffer_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + // We cannot reuse this fallback name in neither global scope (blocked by block_names) nor block name scope. + if (buffer_name.empty()) + buffer_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self); + + block_names.insert(buffer_name); + block_namespace.insert(buffer_name); + + // Save for post-reflection later. + declared_block_names[var.self] = buffer_name; + + statement(layout_for_variable(var), is_coherent ? "coherent " : "", is_restrict ? "restrict " : "", + is_writeonly ? "writeonly " : "", is_readonly ? "readonly " : "", ssbo ? "buffer " : "uniform ", + buffer_name); + + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + } + + // Don't declare empty blocks in GLSL, this is not allowed. + if (type_is_empty(type) && !backend.supports_empty_struct) + statement("int empty_struct_member;"); + + // var.self can be used as a backup name for the block name, + // so we need to make sure we don't disturb the name here on a recompile. + // It will need to be reset if we have to recompile. + preserve_alias_on_reset(var.self); + add_resource_name(var.self); + end_scope_decl(to_name(var.self) + type_to_array_glsl(type, var.self)); + statement(""); +} + +void CompilerGLSL::emit_buffer_block_flattened(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + + // Block names should never alias. + auto buffer_name = to_name(type.self, false); + size_t buffer_size = (get_declared_struct_size(type) + 15) / 16; + + SPIRType::BaseType basic_type; + if (get_common_basic_type(type, basic_type)) + { + SPIRType tmp { OpTypeVector }; + tmp.basetype = basic_type; + tmp.vecsize = 4; + if (basic_type != SPIRType::Float && basic_type != SPIRType::Int && basic_type != SPIRType::UInt) + SPIRV_CROSS_THROW("Basic types in a flattened UBO must be float, int or uint."); + + auto flags = ir.get_buffer_block_flags(var); + statement("uniform ", flags_to_qualifiers_glsl(tmp, flags), type_to_glsl(tmp), " ", buffer_name, "[", + buffer_size, "];"); + } + else + SPIRV_CROSS_THROW("All basic types in a flattened block must be the same."); +} + +const char *CompilerGLSL::to_storage_qualifiers_glsl(const SPIRVariable &var) +{ + auto &execution = get_entry_point(); + + if (subpass_input_is_framebuffer_fetch(var.self)) + return ""; + + if (var.storage == StorageClassInput || var.storage == StorageClassOutput) + { + if (is_legacy() && execution.model == ExecutionModelVertex) + return var.storage == StorageClassInput ? "attribute " : "varying "; + else if (is_legacy() && execution.model == ExecutionModelFragment) + return "varying "; // Fragment outputs are renamed so they never hit this case. + else if (execution.model == ExecutionModelFragment && var.storage == StorageClassOutput) + { + uint32_t loc = get_decoration(var.self, DecorationLocation); + bool is_inout = location_is_framebuffer_fetch(loc); + if (is_inout) + return "inout "; + else + return "out "; + } + else + return var.storage == StorageClassInput ? "in " : "out "; + } + else if (var.storage == StorageClassUniformConstant || var.storage == StorageClassUniform || + var.storage == StorageClassPushConstant || var.storage == StorageClassAtomicCounter) + { + return "uniform "; + } + else if (var.storage == StorageClassRayPayloadKHR) + { + return ray_tracing_is_khr ? "rayPayloadEXT " : "rayPayloadNV "; + } + else if (var.storage == StorageClassIncomingRayPayloadKHR) + { + return ray_tracing_is_khr ? "rayPayloadInEXT " : "rayPayloadInNV "; + } + else if (var.storage == StorageClassHitAttributeKHR) + { + return ray_tracing_is_khr ? "hitAttributeEXT " : "hitAttributeNV "; + } + else if (var.storage == StorageClassCallableDataKHR) + { + return ray_tracing_is_khr ? "callableDataEXT " : "callableDataNV "; + } + else if (var.storage == StorageClassIncomingCallableDataKHR) + { + return ray_tracing_is_khr ? "callableDataInEXT " : "callableDataInNV "; + } + + return ""; +} + +void CompilerGLSL::emit_flattened_io_block_member(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector<uint32_t> &indices) +{ + uint32_t member_type_id = type.self; + const SPIRType *member_type = &type; + const SPIRType *parent_type = nullptr; + auto flattened_name = basename; + for (auto &index : indices) + { + flattened_name += "_"; + flattened_name += to_member_name(*member_type, index); + parent_type = member_type; + member_type_id = member_type->member_types[index]; + member_type = &get<SPIRType>(member_type_id); + } + + assert(member_type->basetype != SPIRType::Struct); + + // We're overriding struct member names, so ensure we do so on the primary type. + if (parent_type->type_alias) + parent_type = &get<SPIRType>(parent_type->type_alias); + + // Sanitize underscores because joining the two identifiers might create more than 1 underscore in a row, + // which is not allowed. + ParsedIR::sanitize_underscores(flattened_name); + + uint32_t last_index = indices.back(); + + // Pass in the varying qualifier here so it will appear in the correct declaration order. + // Replace member name while emitting it so it encodes both struct name and member name. + auto backup_name = get_member_name(parent_type->self, last_index); + auto member_name = to_member_name(*parent_type, last_index); + set_member_name(parent_type->self, last_index, flattened_name); + emit_struct_member(*parent_type, member_type_id, last_index, qual); + // Restore member name. + set_member_name(parent_type->self, last_index, member_name); +} + +void CompilerGLSL::emit_flattened_io_block_struct(const std::string &basename, const SPIRType &type, const char *qual, + const SmallVector<uint32_t> &indices) +{ + auto sub_indices = indices; + sub_indices.push_back(0); + + const SPIRType *member_type = &type; + for (auto &index : indices) + member_type = &get<SPIRType>(member_type->member_types[index]); + + assert(member_type->basetype == SPIRType::Struct); + + if (!member_type->array.empty()) + SPIRV_CROSS_THROW("Cannot flatten array of structs in I/O blocks."); + + for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) + { + sub_indices.back() = i; + if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct) + emit_flattened_io_block_struct(basename, type, qual, sub_indices); + else + emit_flattened_io_block_member(basename, type, qual, sub_indices); + } +} + +void CompilerGLSL::emit_flattened_io_block(const SPIRVariable &var, const char *qual) +{ + auto &var_type = get<SPIRType>(var.basetype); + if (!var_type.array.empty()) + SPIRV_CROSS_THROW("Array of varying structs cannot be flattened to legacy-compatible varyings."); + + // Emit flattened types based on the type alias. Normally, we are never supposed to emit + // struct declarations for aliased types. + auto &type = var_type.type_alias ? get<SPIRType>(var_type.type_alias) : var_type; + + auto old_flags = ir.meta[type.self].decoration.decoration_flags; + // Emit the members as if they are part of a block to get all qualifiers. + ir.meta[type.self].decoration.decoration_flags.set(DecorationBlock); + + type.member_name_cache.clear(); + + SmallVector<uint32_t> member_indices; + member_indices.push_back(0); + auto basename = to_name(var.self); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + auto &membertype = get<SPIRType>(member); + + member_indices.back() = i; + if (membertype.basetype == SPIRType::Struct) + emit_flattened_io_block_struct(basename, type, qual, member_indices); + else + emit_flattened_io_block_member(basename, type, qual, member_indices); + i++; + } + + ir.meta[type.self].decoration.decoration_flags = old_flags; + + // Treat this variable as fully flattened from now on. + flattened_structs[var.self] = true; +} + +void CompilerGLSL::emit_interface_block(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + + if (var.storage == StorageClassInput && type.basetype == SPIRType::Double && + !options.es && options.version < 410) + { + require_extension_internal("GL_ARB_vertex_attrib_64bit"); + } + + // Either make it plain in/out or in/out blocks depending on what shader is doing ... + bool block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock); + const char *qual = to_storage_qualifiers_glsl(var); + + if (block) + { + // ESSL earlier than 310 and GLSL earlier than 150 did not support + // I/O variables which are struct types. + // To support this, flatten the struct into separate varyings instead. + if (options.force_flattened_io_blocks || (options.es && options.version < 310) || + (!options.es && options.version < 150)) + { + // I/O blocks on ES require version 310 with Android Extension Pack extensions, or core version 320. + // On desktop, I/O blocks were introduced with geometry shaders in GL 3.2 (GLSL 150). + emit_flattened_io_block(var, qual); + } + else + { + if (options.es && options.version < 320) + { + // Geometry and tessellation extensions imply this extension. + if (!has_extension("GL_EXT_geometry_shader") && !has_extension("GL_EXT_tessellation_shader")) + require_extension_internal("GL_EXT_shader_io_blocks"); + } + + // Workaround to make sure we can emit "patch in/out" correctly. + fixup_io_block_patch_primitive_qualifiers(var); + + // Block names should never alias. + auto block_name = to_name(type.self, false); + + // The namespace for I/O blocks is separate from other variables in GLSL. + auto &block_namespace = type.storage == StorageClassInput ? block_input_names : block_output_names; + + // Shaders never use the block by interface name, so we don't + // have to track this other than updating name caches. + if (block_name.empty() || block_namespace.find(block_name) != end(block_namespace)) + block_name = get_fallback_name(type.self); + else + block_namespace.insert(block_name); + + // If for some reason buffer_name is an illegal name, make a final fallback to a workaround name. + // This cannot conflict with anything else, so we're safe now. + if (block_name.empty()) + block_name = join("_", get<SPIRType>(var.basetype).self, "_", var.self); + + // Instance names cannot alias block names. + resource_names.insert(block_name); + + const char *block_qualifier; + if (has_decoration(var.self, DecorationPatch)) + block_qualifier = "patch "; + else if (has_decoration(var.self, DecorationPerPrimitiveEXT)) + block_qualifier = "perprimitiveEXT "; + else + block_qualifier = ""; + + statement(layout_for_variable(var), block_qualifier, qual, block_name); + begin_scope(); + + type.member_name_cache.clear(); + + uint32_t i = 0; + for (auto &member : type.member_types) + { + add_member_name(type, i); + emit_struct_member(type, member, i); + i++; + } + + add_resource_name(var.self); + end_scope_decl(join(to_name(var.self), type_to_array_glsl(type, var.self))); + statement(""); + } + } + else + { + // ESSL earlier than 310 and GLSL earlier than 150 did not support + // I/O variables which are struct types. + // To support this, flatten the struct into separate varyings instead. + if (type.basetype == SPIRType::Struct && + (options.force_flattened_io_blocks || (options.es && options.version < 310) || + (!options.es && options.version < 150))) + { + emit_flattened_io_block(var, qual); + } + else + { + add_resource_name(var.self); + + // Legacy GLSL did not support int attributes, we automatically + // declare them as float and cast them on load/store + SPIRType newtype = type; + if (is_legacy() && var.storage == StorageClassInput && type.basetype == SPIRType::Int) + newtype.basetype = SPIRType::Float; + + // Tessellation control and evaluation shaders must have either + // gl_MaxPatchVertices or unsized arrays for input arrays. + // Opt for unsized as it's the more "correct" variant to use. + if (type.storage == StorageClassInput && !type.array.empty() && + !has_decoration(var.self, DecorationPatch) && + (get_entry_point().model == ExecutionModelTessellationControl || + get_entry_point().model == ExecutionModelTessellationEvaluation)) + { + newtype.array.back() = 0; + newtype.array_size_literal.back() = true; + } + + statement(layout_for_variable(var), to_qualifiers_glsl(var.self), + variable_decl(newtype, to_name(var.self), var.self), ";"); + } + } +} + +void CompilerGLSL::emit_uniform(const SPIRVariable &var) +{ + auto &type = get<SPIRType>(var.basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 2 && type.image.dim != DimSubpassData) + { + if (!options.es && options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for shader image load store."); + } + + add_resource_name(var.self); + statement(layout_for_variable(var), variable_decl(var), ";"); +} + +string CompilerGLSL::constant_value_macro_name(uint32_t id) +{ + return join("SPIRV_CROSS_CONSTANT_ID_", id); +} + +void CompilerGLSL::emit_specialization_constant_op(const SPIRConstantOp &constant) +{ + auto &type = get<SPIRType>(constant.basetype); + // This will break. It is bogus and should not be legal. + if (type_is_top_level_block(type)) + return; + add_resource_name(constant.self); + auto name = to_name(constant.self); + statement("const ", variable_decl(type, name), " = ", constant_op_expression(constant), ";"); +} + +int CompilerGLSL::get_constant_mapping_to_workgroup_component(const SPIRConstant &c) const +{ + auto &entry_point = get_entry_point(); + int index = -1; + + // Need to redirect specialization constants which are used as WorkGroupSize to the builtin, + // since the spec constant declarations are never explicitly declared. + if (entry_point.workgroup_size.constant == 0 && entry_point.flags.get(ExecutionModeLocalSizeId)) + { + if (c.self == entry_point.workgroup_size.id_x) + index = 0; + else if (c.self == entry_point.workgroup_size.id_y) + index = 1; + else if (c.self == entry_point.workgroup_size.id_z) + index = 2; + } + + return index; +} + +void CompilerGLSL::emit_constant(const SPIRConstant &constant) +{ + auto &type = get<SPIRType>(constant.constant_type); + + // This will break. It is bogus and should not be legal. + if (type_is_top_level_block(type)) + return; + + SpecializationConstant wg_x, wg_y, wg_z; + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + // This specialization constant is implicitly declared by emitting layout() in; + if (constant.self == workgroup_size_id) + return; + + // These specialization constants are implicitly declared by emitting layout() in; + // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration + // later can use macro overrides for work group size. + bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id || + ConstantID(constant.self) == wg_z.id; + + if (options.vulkan_semantics && is_workgroup_size_constant) + { + // Vulkan GLSL does not need to declare workgroup spec constants explicitly, it is handled in layout(). + return; + } + else if (!options.vulkan_semantics && is_workgroup_size_constant && + !has_decoration(constant.self, DecorationSpecId)) + { + // Only bother declaring a workgroup size if it is actually a specialization constant, because we need macros. + return; + } + + add_resource_name(constant.self); + auto name = to_name(constant.self); + + // Only scalars have constant IDs. + if (has_decoration(constant.self, DecorationSpecId)) + { + if (options.vulkan_semantics) + { + statement("layout(constant_id = ", get_decoration(constant.self, DecorationSpecId), ") const ", + variable_decl(type, name), " = ", constant_expression(constant), ";"); + } + else + { + const string ¯o_name = constant.specialization_constant_macro_name; + statement("#ifndef ", macro_name); + statement("#define ", macro_name, " ", constant_expression(constant)); + statement("#endif"); + + // For workgroup size constants, only emit the macros. + if (!is_workgroup_size_constant) + statement("const ", variable_decl(type, name), " = ", macro_name, ";"); + } + } + else + { + statement("const ", variable_decl(type, name), " = ", constant_expression(constant), ";"); + } +} + +void CompilerGLSL::emit_entry_point_declarations() +{ +} + +void CompilerGLSL::replace_illegal_names(const unordered_set<string> &keywords) +{ + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { + if (is_hidden_variable(var)) + return; + + auto *meta = ir.find_meta(var.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + }); + + ir.for_each_typed_id<SPIRFunction>([&](uint32_t, const SPIRFunction &func) { + auto *meta = ir.find_meta(func.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + }); + + ir.for_each_typed_id<SPIRType>([&](uint32_t, const SPIRType &type) { + auto *meta = ir.find_meta(type.self); + if (!meta) + return; + + auto &m = meta->decoration; + if (keywords.find(m.alias) != end(keywords)) + m.alias = join("_", m.alias); + + for (auto &memb : meta->members) + if (keywords.find(memb.alias) != end(keywords)) + memb.alias = join("_", memb.alias); + }); +} + +void CompilerGLSL::replace_illegal_names() +{ + // clang-format off + static const unordered_set<string> keywords = { + "abs", "acos", "acosh", "all", "any", "asin", "asinh", "atan", "atanh", + "atomicAdd", "atomicCompSwap", "atomicCounter", "atomicCounterDecrement", "atomicCounterIncrement", + "atomicExchange", "atomicMax", "atomicMin", "atomicOr", "atomicXor", + "bitCount", "bitfieldExtract", "bitfieldInsert", "bitfieldReverse", + "ceil", "cos", "cosh", "cross", "degrees", + "dFdx", "dFdxCoarse", "dFdxFine", + "dFdy", "dFdyCoarse", "dFdyFine", + "distance", "dot", "EmitStreamVertex", "EmitVertex", "EndPrimitive", "EndStreamPrimitive", "equal", "exp", "exp2", + "faceforward", "findLSB", "findMSB", "float16BitsToInt16", "float16BitsToUint16", "floatBitsToInt", "floatBitsToUint", "floor", "fma", "fract", + "frexp", "fwidth", "fwidthCoarse", "fwidthFine", + "greaterThan", "greaterThanEqual", "groupMemoryBarrier", + "imageAtomicAdd", "imageAtomicAnd", "imageAtomicCompSwap", "imageAtomicExchange", "imageAtomicMax", "imageAtomicMin", "imageAtomicOr", "imageAtomicXor", + "imageLoad", "imageSamples", "imageSize", "imageStore", "imulExtended", "int16BitsToFloat16", "intBitsToFloat", "interpolateAtOffset", "interpolateAtCentroid", "interpolateAtSample", + "inverse", "inversesqrt", "isinf", "isnan", "ldexp", "length", "lessThan", "lessThanEqual", "log", "log2", + "matrixCompMult", "max", "memoryBarrier", "memoryBarrierAtomicCounter", "memoryBarrierBuffer", "memoryBarrierImage", "memoryBarrierShared", + "min", "mix", "mod", "modf", "noise", "noise1", "noise2", "noise3", "noise4", "normalize", "not", "notEqual", + "outerProduct", "packDouble2x32", "packHalf2x16", "packInt2x16", "packInt4x16", "packSnorm2x16", "packSnorm4x8", + "packUint2x16", "packUint4x16", "packUnorm2x16", "packUnorm4x8", "pow", + "radians", "reflect", "refract", "round", "roundEven", "sign", "sin", "sinh", "smoothstep", "sqrt", "step", + "tan", "tanh", "texelFetch", "texelFetchOffset", "texture", "textureGather", "textureGatherOffset", "textureGatherOffsets", + "textureGrad", "textureGradOffset", "textureLod", "textureLodOffset", "textureOffset", "textureProj", "textureProjGrad", + "textureProjGradOffset", "textureProjLod", "textureProjLodOffset", "textureProjOffset", "textureQueryLevels", "textureQueryLod", "textureSamples", "textureSize", + "transpose", "trunc", "uaddCarry", "uint16BitsToFloat16", "uintBitsToFloat", "umulExtended", "unpackDouble2x32", "unpackHalf2x16", "unpackInt2x16", "unpackInt4x16", + "unpackSnorm2x16", "unpackSnorm4x8", "unpackUint2x16", "unpackUint4x16", "unpackUnorm2x16", "unpackUnorm4x8", "usubBorrow", + + "active", "asm", "atomic_uint", "attribute", "bool", "break", "buffer", + "bvec2", "bvec3", "bvec4", "case", "cast", "centroid", "class", "coherent", "common", "const", "continue", "default", "discard", + "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", + "do", "double", "dvec2", "dvec3", "dvec4", "else", "enum", "extern", "external", "false", "filter", "fixed", "flat", "float", + "for", "fvec2", "fvec3", "fvec4", "goto", "half", "highp", "hvec2", "hvec3", "hvec4", "if", "iimage1D", "iimage1DArray", + "iimage2D", "iimage2DArray", "iimage2DMS", "iimage2DMSArray", "iimage2DRect", "iimage3D", "iimageBuffer", "iimageCube", + "iimageCubeArray", "image1D", "image1DArray", "image2D", "image2DArray", "image2DMS", "image2DMSArray", "image2DRect", + "image3D", "imageBuffer", "imageCube", "imageCubeArray", "in", "inline", "inout", "input", "int", "interface", "invariant", + "isampler1D", "isampler1DArray", "isampler2D", "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", + "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", "ivec2", "ivec3", "ivec4", "layout", "long", "lowp", + "mat2", "mat2x2", "mat2x3", "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", "mat4x4", "mediump", + "namespace", "noinline", "noperspective", "out", "output", "packed", "partition", "patch", "precise", "precision", "public", "readonly", + "resource", "restrict", "return", "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", + "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", "sampler2DMS", "sampler2DMSArray", + "sampler2DRect", "sampler2DRectShadow", "sampler2DShadow", "sampler3D", "sampler3DRect", "samplerBuffer", + "samplerCube", "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "shared", "short", "sizeof", "smooth", "static", + "struct", "subroutine", "superp", "switch", "template", "this", "true", "typedef", "uimage1D", "uimage1DArray", "uimage2D", + "uimage2DArray", "uimage2DMS", "uimage2DMSArray", "uimage2DRect", "uimage3D", "uimageBuffer", "uimageCube", + "uimageCubeArray", "uint", "uniform", "union", "unsigned", "usampler1D", "usampler1DArray", "usampler2D", "usampler2DArray", + "usampler2DMS", "usampler2DMSArray", "usampler2DRect", "usampler3D", "usamplerBuffer", "usamplerCube", + "usamplerCubeArray", "using", "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", "volatile", + "while", "writeonly", + }; + // clang-format on + + replace_illegal_names(keywords); +} + +void CompilerGLSL::replace_fragment_output(SPIRVariable &var) +{ + auto &m = ir.meta[var.self].decoration; + uint32_t location = 0; + if (m.decoration_flags.get(DecorationLocation)) + location = m.location; + + // If our variable is arrayed, we must not emit the array part of this as the SPIR-V will + // do the access chain part of this for us. + auto &type = get<SPIRType>(var.basetype); + + if (type.array.empty()) + { + // Redirect the write to a specific render target in legacy GLSL. + m.alias = join("gl_FragData[", location, "]"); + + if (is_legacy_es() && location != 0) + require_extension_internal("GL_EXT_draw_buffers"); + } + else if (type.array.size() == 1) + { + // If location is non-zero, we probably have to add an offset. + // This gets really tricky since we'd have to inject an offset in the access chain. + // FIXME: This seems like an extremely odd-ball case, so it's probably fine to leave it like this for now. + m.alias = "gl_FragData"; + if (location != 0) + SPIRV_CROSS_THROW("Arrayed output variable used, but location is not 0. " + "This is unimplemented in SPIRV-Cross."); + + if (is_legacy_es()) + require_extension_internal("GL_EXT_draw_buffers"); + } + else + SPIRV_CROSS_THROW("Array-of-array output variable used. This cannot be implemented in legacy GLSL."); + + var.compat_builtin = true; // We don't want to declare this variable, but use the name as-is. +} + +void CompilerGLSL::replace_fragment_outputs() +{ + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + if (!is_builtin_variable(var) && !var.remapped_variable && type.pointer && var.storage == StorageClassOutput) + replace_fragment_output(var); + }); +} + +string CompilerGLSL::remap_swizzle(const SPIRType &out_type, uint32_t input_components, const string &expr) +{ + if (out_type.vecsize == input_components) + return expr; + else if (input_components == 1 && !backend.can_swizzle_scalar) + return join(type_to_glsl(out_type), "(", expr, ")"); + else + { + // FIXME: This will not work with packed expressions. + auto e = enclose_expression(expr) + "."; + // Just clamp the swizzle index if we have more outputs than inputs. + for (uint32_t c = 0; c < out_type.vecsize; c++) + e += index_to_swizzle(min(c, input_components - 1)); + if (backend.swizzle_is_function && out_type.vecsize > 1) + e += "()"; + + remove_duplicate_swizzle(e); + return e; + } +} + +void CompilerGLSL::emit_pls() +{ + auto &execution = get_entry_point(); + if (execution.model != ExecutionModelFragment) + SPIRV_CROSS_THROW("Pixel local storage only supported in fragment shaders."); + + if (!options.es) + SPIRV_CROSS_THROW("Pixel local storage only supported in OpenGL ES."); + + if (options.version < 300) + SPIRV_CROSS_THROW("Pixel local storage only supported in ESSL 3.0 and above."); + + if (!pls_inputs.empty()) + { + statement("__pixel_local_inEXT _PLSIn"); + begin_scope(); + for (auto &input : pls_inputs) + statement(pls_decl(input), ";"); + end_scope_decl(); + statement(""); + } + + if (!pls_outputs.empty()) + { + statement("__pixel_local_outEXT _PLSOut"); + begin_scope(); + for (auto &output : pls_outputs) + statement(pls_decl(output), ";"); + end_scope_decl(); + statement(""); + } +} + +void CompilerGLSL::fixup_image_load_store_access() +{ + if (!options.enable_storage_image_qualifier_deduction) + return; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t var, const SPIRVariable &) { + auto &vartype = expression_type(var); + if (vartype.basetype == SPIRType::Image && vartype.image.sampled == 2) + { + // Very old glslangValidator and HLSL compilers do not emit required qualifiers here. + // Solve this by making the image access as restricted as possible and loosen up if we need to. + // If any no-read/no-write flags are actually set, assume that the compiler knows what it's doing. + + if (!has_decoration(var, DecorationNonWritable) && !has_decoration(var, DecorationNonReadable)) + { + set_decoration(var, DecorationNonWritable); + set_decoration(var, DecorationNonReadable); + } + } + }); +} + +static bool is_block_builtin(BuiltIn builtin) +{ + return builtin == BuiltInPosition || builtin == BuiltInPointSize || builtin == BuiltInClipDistance || + builtin == BuiltInCullDistance; +} + +bool CompilerGLSL::should_force_emit_builtin_block(StorageClass storage) +{ + // If the builtin block uses XFB, we need to force explicit redeclaration of the builtin block. + + if (storage != StorageClassOutput) + return false; + bool should_force = false; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + if (should_force) + return; + + auto &type = this->get<SPIRType>(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if (var.storage == storage && block && is_builtin_variable(var)) + { + uint32_t member_count = uint32_t(type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationBuiltIn) && + is_block_builtin(BuiltIn(get_member_decoration(type.self, i, DecorationBuiltIn))) && + has_member_decoration(type.self, i, DecorationOffset)) + { + should_force = true; + } + } + } + else if (var.storage == storage && !block && is_builtin_variable(var)) + { + if (is_block_builtin(BuiltIn(get_decoration(type.self, DecorationBuiltIn))) && + has_decoration(var.self, DecorationOffset)) + { + should_force = true; + } + } + }); + + // If we're declaring clip/cull planes with control points we need to force block declaration. + if ((get_execution_model() == ExecutionModelTessellationControl || + get_execution_model() == ExecutionModelMeshEXT) && + (clip_distance_count || cull_distance_count)) + { + should_force = true; + } + + // Either glslang bug or oversight, but global invariant position does not work in mesh shaders. + if (get_execution_model() == ExecutionModelMeshEXT && position_invariant) + should_force = true; + + return should_force; +} + +void CompilerGLSL::fixup_implicit_builtin_block_names(ExecutionModel model) +{ + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + if ((var.storage == StorageClassOutput || var.storage == StorageClassInput) && block && + is_builtin_variable(var)) + { + if (model != ExecutionModelMeshEXT) + { + // Make sure the array has a supported name in the code. + if (var.storage == StorageClassOutput) + set_name(var.self, "gl_out"); + else if (var.storage == StorageClassInput) + set_name(var.self, "gl_in"); + } + else + { + auto flags = get_buffer_block_flags(var.self); + if (flags.get(DecorationPerPrimitiveEXT)) + { + set_name(var.self, "gl_MeshPrimitivesEXT"); + set_name(type.self, "gl_MeshPerPrimitiveEXT"); + } + else + { + set_name(var.self, "gl_MeshVerticesEXT"); + set_name(type.self, "gl_MeshPerVertexEXT"); + } + } + } + + if (model == ExecutionModelMeshEXT && var.storage == StorageClassOutput && !block) + { + auto *m = ir.find_meta(var.self); + if (m && m->decoration.builtin) + { + auto builtin_type = m->decoration.builtin_type; + if (builtin_type == BuiltInPrimitivePointIndicesEXT) + set_name(var.self, "gl_PrimitivePointIndicesEXT"); + else if (builtin_type == BuiltInPrimitiveLineIndicesEXT) + set_name(var.self, "gl_PrimitiveLineIndicesEXT"); + else if (builtin_type == BuiltInPrimitiveTriangleIndicesEXT) + set_name(var.self, "gl_PrimitiveTriangleIndicesEXT"); + } + } + }); +} + +void CompilerGLSL::emit_declared_builtin_block(StorageClass storage, ExecutionModel model) +{ + Bitset emitted_builtins; + Bitset global_builtins; + const SPIRVariable *block_var = nullptr; + bool emitted_block = false; + + // Need to use declared size in the type. + // These variables might have been declared, but not statically used, so we haven't deduced their size yet. + uint32_t cull_distance_size = 0; + uint32_t clip_distance_size = 0; + + bool have_xfb_buffer_stride = false; + bool have_geom_stream = false; + bool have_any_xfb_offset = false; + uint32_t xfb_stride = 0, xfb_buffer = 0, geom_stream = 0; + std::unordered_map<uint32_t, uint32_t> builtin_xfb_offsets; + + const auto builtin_is_per_vertex_set = [](BuiltIn builtin) -> bool { + return builtin == BuiltInPosition || builtin == BuiltInPointSize || + builtin == BuiltInClipDistance || builtin == BuiltInCullDistance; + }; + + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + bool block = has_decoration(type.self, DecorationBlock); + Bitset builtins; + + if (var.storage == storage && block && is_builtin_variable(var)) + { + uint32_t index = 0; + for (auto &m : ir.meta[type.self].members) + { + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) + { + builtins.set(m.builtin_type); + if (m.builtin_type == BuiltInCullDistance) + cull_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index])); + else if (m.builtin_type == BuiltInClipDistance) + clip_distance_size = to_array_size_literal(this->get<SPIRType>(type.member_types[index])); + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationOffset)) + { + have_any_xfb_offset = true; + builtin_xfb_offsets[m.builtin_type] = m.offset; + } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = m.stream; + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + index++; + } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationXfbBuffer) && + has_decoration(var.self, DecorationXfbStride)) + { + uint32_t buffer_index = get_decoration(var.self, DecorationXfbBuffer); + uint32_t stride = get_decoration(var.self, DecorationXfbStride); + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + xfb_stride = stride; + } + + if (storage == StorageClassOutput && has_decoration(var.self, DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + else if (var.storage == storage && !block && is_builtin_variable(var)) + { + // While we're at it, collect all declared global builtins (HLSL mostly ...). + auto &m = ir.meta[var.self].decoration; + if (m.builtin && builtin_is_per_vertex_set(m.builtin_type)) + { + // For mesh/tesc output, Clip/Cull is an array-of-array. Look at innermost array type + // for correct result. + global_builtins.set(m.builtin_type); + if (m.builtin_type == BuiltInCullDistance) + cull_distance_size = to_array_size_literal(type, 0); + else if (m.builtin_type == BuiltInClipDistance) + clip_distance_size = to_array_size_literal(type, 0); + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationXfbStride) && + m.decoration_flags.get(DecorationXfbBuffer) && m.decoration_flags.get(DecorationOffset)) + { + have_any_xfb_offset = true; + builtin_xfb_offsets[m.builtin_type] = m.offset; + uint32_t buffer_index = m.xfb_buffer; + uint32_t stride = m.xfb_stride; + if (have_xfb_buffer_stride && buffer_index != xfb_buffer) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + if (have_xfb_buffer_stride && stride != xfb_stride) + SPIRV_CROSS_THROW("IO block member XfbBuffer mismatch."); + have_xfb_buffer_stride = true; + xfb_buffer = buffer_index; + xfb_stride = stride; + } + + if (is_block_builtin(m.builtin_type) && m.decoration_flags.get(DecorationStream)) + { + uint32_t stream = get_decoration(var.self, DecorationStream); + if (have_geom_stream && geom_stream != stream) + SPIRV_CROSS_THROW("IO block member Stream mismatch."); + have_geom_stream = true; + geom_stream = stream; + } + } + } + + if (builtins.empty()) + return; + + if (emitted_block) + SPIRV_CROSS_THROW("Cannot use more than one builtin I/O block."); + + emitted_builtins = builtins; + emitted_block = true; + block_var = &var; + }); + + global_builtins = + Bitset(global_builtins.get_lower() & ((1ull << BuiltInPosition) | (1ull << BuiltInPointSize) | + (1ull << BuiltInClipDistance) | (1ull << BuiltInCullDistance))); + + // Try to collect all other declared builtins. + if (!emitted_block) + emitted_builtins = global_builtins; + + // Can't declare an empty interface block. + if (emitted_builtins.empty()) + return; + + if (storage == StorageClassOutput) + { + SmallVector<string> attr; + if (have_xfb_buffer_stride && have_any_xfb_offset) + { + if (!options.es) + { + if (options.version < 440 && options.version >= 140) + require_extension_internal("GL_ARB_enhanced_layouts"); + else if (options.version < 140) + SPIRV_CROSS_THROW("Component decoration is not supported in targets below GLSL 1.40."); + if (!options.es && options.version < 440) + require_extension_internal("GL_ARB_enhanced_layouts"); + } + else if (options.es) + SPIRV_CROSS_THROW("Need GL_ARB_enhanced_layouts for xfb_stride or xfb_buffer."); + attr.push_back(join("xfb_buffer = ", xfb_buffer, ", xfb_stride = ", xfb_stride)); + } + + if (have_geom_stream) + { + if (get_execution_model() != ExecutionModelGeometry) + SPIRV_CROSS_THROW("Geometry streams can only be used in geometry shaders."); + if (options.es) + SPIRV_CROSS_THROW("Multiple geometry streams not supported in ESSL."); + if (options.version < 400) + require_extension_internal("GL_ARB_transform_feedback3"); + attr.push_back(join("stream = ", geom_stream)); + } + + if (model == ExecutionModelMeshEXT) + statement("out gl_MeshPerVertexEXT"); + else if (!attr.empty()) + statement("layout(", merge(attr), ") out gl_PerVertex"); + else + statement("out gl_PerVertex"); + } + else + { + // If we have passthrough, there is no way PerVertex cannot be passthrough. + if (get_entry_point().geometry_passthrough) + statement("layout(passthrough) in gl_PerVertex"); + else + statement("in gl_PerVertex"); + } + + begin_scope(); + if (emitted_builtins.get(BuiltInPosition)) + { + auto itr = builtin_xfb_offsets.find(BuiltInPosition); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") vec4 gl_Position;"); + else if (position_invariant) + statement("invariant vec4 gl_Position;"); + else + statement("vec4 gl_Position;"); + } + + if (emitted_builtins.get(BuiltInPointSize)) + { + auto itr = builtin_xfb_offsets.find(BuiltInPointSize); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_PointSize;"); + else + statement("float gl_PointSize;"); + } + + if (emitted_builtins.get(BuiltInClipDistance)) + { + auto itr = builtin_xfb_offsets.find(BuiltInClipDistance); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_ClipDistance[", clip_distance_size, "];"); + else + statement("float gl_ClipDistance[", clip_distance_size, "];"); + } + + if (emitted_builtins.get(BuiltInCullDistance)) + { + auto itr = builtin_xfb_offsets.find(BuiltInCullDistance); + if (itr != end(builtin_xfb_offsets)) + statement("layout(xfb_offset = ", itr->second, ") float gl_CullDistance[", cull_distance_size, "];"); + else + statement("float gl_CullDistance[", cull_distance_size, "];"); + } + + bool builtin_array = model == ExecutionModelTessellationControl || + (model == ExecutionModelMeshEXT && storage == StorageClassOutput) || + (model == ExecutionModelGeometry && storage == StorageClassInput) || + (model == ExecutionModelTessellationEvaluation && storage == StorageClassInput); + + if (builtin_array) + { + const char *instance_name; + if (model == ExecutionModelMeshEXT) + instance_name = "gl_MeshVerticesEXT"; // Per primitive is never synthesized. + else + instance_name = storage == StorageClassInput ? "gl_in" : "gl_out"; + + if (model == ExecutionModelTessellationControl && storage == StorageClassOutput) + end_scope_decl(join(instance_name, "[", get_entry_point().output_vertices, "]")); + else + end_scope_decl(join(instance_name, "[]")); + } + else + end_scope_decl(); + statement(""); +} + +bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const +{ + bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable; + + if (statically_assigned) + { + auto *constant = maybe_get<SPIRConstant>(var.static_expression); + if (constant && constant->is_used_as_lut) + return true; + } + + return false; +} + +void CompilerGLSL::emit_resources() +{ + auto &execution = get_entry_point(); + + replace_illegal_names(); + + // Legacy GL uses gl_FragData[], redeclare all fragment outputs + // with builtins. + if (execution.model == ExecutionModelFragment && is_legacy()) + replace_fragment_outputs(); + + // Emit PLS blocks if we have such variables. + if (!pls_inputs.empty() || !pls_outputs.empty()) + emit_pls(); + + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + case ExecutionModelMeshEXT: + fixup_implicit_builtin_block_names(execution.model); + break; + + default: + break; + } + + bool global_invariant_position = position_invariant && (options.es || options.version >= 120); + + // Emit custom gl_PerVertex for SSO compatibility. + if (options.separate_shader_objects && !options.es && execution.model != ExecutionModelFragment) + { + switch (execution.model) + { + case ExecutionModelGeometry: + case ExecutionModelTessellationControl: + case ExecutionModelTessellationEvaluation: + emit_declared_builtin_block(StorageClassInput, execution.model); + emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; + break; + + case ExecutionModelVertex: + case ExecutionModelMeshEXT: + emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; + break; + + default: + break; + } + } + else if (should_force_emit_builtin_block(StorageClassOutput)) + { + emit_declared_builtin_block(StorageClassOutput, execution.model); + global_invariant_position = false; + } + else if (execution.geometry_passthrough) + { + // Need to declare gl_in with Passthrough. + // If we're doing passthrough, we cannot emit an output block, so the output block test above will never pass. + emit_declared_builtin_block(StorageClassInput, execution.model); + } + else + { + // Need to redeclare clip/cull distance with explicit size to use them. + // SPIR-V mandates these builtins have a size declared. + const char *storage = execution.model == ExecutionModelFragment ? "in" : "out"; + if (clip_distance_count != 0) + statement(storage, " float gl_ClipDistance[", clip_distance_count, "];"); + if (cull_distance_count != 0) + statement(storage, " float gl_CullDistance[", cull_distance_count, "];"); + if (clip_distance_count != 0 || cull_distance_count != 0) + statement(""); + } + + if (global_invariant_position) + { + statement("invariant gl_Position;"); + statement(""); + } + + bool emitted = false; + + // If emitted Vulkan GLSL, + // emit specialization constants as actual floats, + // spec op expressions will redirect to the constant name. + // + { + auto loop_lock = ir.create_loop_hard_lock(); + for (auto &id_ : ir.ids_for_constant_undef_or_type) + { + auto &id = ir.ids[id_]; + + // Skip declaring any bogus constants or undefs which use block types. + // We don't declare block types directly, so this will never work. + // Should not be legal SPIR-V, so this is considered a workaround. + + if (id.get_type() == TypeConstant) + { + auto &c = id.get<SPIRConstant>(); + + bool needs_declaration = c.specialization || c.is_used_as_lut; + + if (needs_declaration) + { + if (!options.vulkan_semantics && c.specialization) + { + c.specialization_constant_macro_name = + constant_value_macro_name(get_decoration(c.self, DecorationSpecId)); + } + emit_constant(c); + emitted = true; + } + } + else if (id.get_type() == TypeConstantOp) + { + emit_specialization_constant_op(id.get<SPIRConstantOp>()); + emitted = true; + } + else if (id.get_type() == TypeType) + { + auto *type = &id.get<SPIRType>(); + + bool is_natural_struct = type->basetype == SPIRType::Struct && type->array.empty() && !type->pointer && + (!has_decoration(type->self, DecorationBlock) && + !has_decoration(type->self, DecorationBufferBlock)); + + // Special case, ray payload and hit attribute blocks are not really blocks, just regular structs. + if (type->basetype == SPIRType::Struct && type->pointer && + has_decoration(type->self, DecorationBlock) && + (type->storage == StorageClassRayPayloadKHR || type->storage == StorageClassIncomingRayPayloadKHR || + type->storage == StorageClassHitAttributeKHR)) + { + type = &get<SPIRType>(type->parent_type); + is_natural_struct = true; + } + + if (is_natural_struct) + { + if (emitted) + statement(""); + emitted = false; + + emit_struct(*type); + } + } + else if (id.get_type() == TypeUndef) + { + auto &undef = id.get<SPIRUndef>(); + auto &type = this->get<SPIRType>(undef.basetype); + // OpUndef can be void for some reason ... + if (type.basetype == SPIRType::Void) + return; + + // This will break. It is bogus and should not be legal. + if (type_is_top_level_block(type)) + return; + + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(undef.basetype)); + + // FIXME: If used in a constant, we must declare it as one. + statement(variable_decl(type, to_name(undef.self), undef.self), initializer, ";"); + emitted = true; + } + } + } + + if (emitted) + statement(""); + + // If we needed to declare work group size late, check here. + // If the work group size depends on a specialization constant, we need to declare the layout() block + // after constants (and their macros) have been declared. + if (execution.model == ExecutionModelGLCompute && !options.vulkan_semantics && + (execution.workgroup_size.constant != 0 || execution.flags.get(ExecutionModeLocalSizeId))) + { + SpecializationConstant wg_x, wg_y, wg_z; + get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + + if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0))) + { + SmallVector<string> inputs; + build_workgroup_size(inputs, wg_x, wg_y, wg_z); + statement("layout(", merge(inputs), ") in;"); + statement(""); + } + } + + emitted = false; + + if (ir.addressing_model == AddressingModelPhysicalStorageBuffer64EXT) + { + // Output buffer reference blocks. + // Do this in two stages, one with forward declaration, + // and one without. Buffer reference blocks can reference themselves + // to support things like linked lists. + ir.for_each_typed_id<SPIRType>([&](uint32_t id, SPIRType &type) { + if (is_physical_pointer(type)) + { + bool emit_type = true; + if (!is_physical_pointer_to_buffer_block(type)) + { + // Only forward-declare if we intend to emit it in the non_block_pointer types. + // Otherwise, these are just "benign" pointer types that exist as a result of access chains. + emit_type = std::find(physical_storage_non_block_pointer_types.begin(), + physical_storage_non_block_pointer_types.end(), + id) != physical_storage_non_block_pointer_types.end(); + } + + if (emit_type) + emit_buffer_reference_block(id, true); + } + }); + + for (auto type : physical_storage_non_block_pointer_types) + emit_buffer_reference_block(type, false); + + ir.for_each_typed_id<SPIRType>([&](uint32_t id, SPIRType &type) { + if (is_physical_pointer_to_buffer_block(type)) + emit_buffer_reference_block(id, false); + }); + } + + // Output UBOs and SSBOs + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + bool is_block_storage = type.storage == StorageClassStorageBuffer || type.storage == StorageClassUniform || + type.storage == StorageClassShaderRecordBufferKHR; + bool has_block_flags = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (var.storage != StorageClassFunction && type.pointer && is_block_storage && !is_hidden_variable(var) && + has_block_flags) + { + emit_buffer_block(var); + } + }); + + // Output push constant blocks + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + if (var.storage != StorageClassFunction && type.pointer && type.storage == StorageClassPushConstant && + !is_hidden_variable(var)) + { + emit_push_constant_block(var); + } + }); + + bool skip_separate_image_sampler = !combined_image_samplers.empty() || !options.vulkan_semantics; + + // Output Uniform Constants (values, samplers, images, etc). + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + // If we're remapping separate samplers and images, only emit the combined samplers. + if (skip_separate_image_sampler) + { + // Sampler buffers are always used without a sampler, and they will also work in regular GL. + bool sampler_buffer = type.basetype == SPIRType::Image && type.image.dim == DimBuffer; + bool separate_image = type.basetype == SPIRType::Image && type.image.sampled == 1; + bool separate_sampler = type.basetype == SPIRType::Sampler; + if (!sampler_buffer && (separate_image || separate_sampler)) + return; + } + + if (var.storage != StorageClassFunction && type.pointer && + (type.storage == StorageClassUniformConstant || type.storage == StorageClassAtomicCounter || + type.storage == StorageClassRayPayloadKHR || type.storage == StorageClassIncomingRayPayloadKHR || + type.storage == StorageClassCallableDataKHR || type.storage == StorageClassIncomingCallableDataKHR || + type.storage == StorageClassHitAttributeKHR) && + !is_hidden_variable(var)) + { + emit_uniform(var); + emitted = true; + } + }); + + if (emitted) + statement(""); + emitted = false; + + bool emitted_base_instance = false; + + // Output in/out interfaces. + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, SPIRVariable &var) { + auto &type = this->get<SPIRType>(var.basetype); + + bool is_hidden = is_hidden_variable(var); + + // Unused output I/O variables might still be required to implement framebuffer fetch. + if (var.storage == StorageClassOutput && !is_legacy() && + location_is_framebuffer_fetch(get_decoration(var.self, DecorationLocation)) != 0) + { + is_hidden = false; + } + + if (var.storage != StorageClassFunction && type.pointer && + (var.storage == StorageClassInput || var.storage == StorageClassOutput) && + interface_variable_exists_in_entry_point(var.self) && !is_hidden) + { + if (options.es && get_execution_model() == ExecutionModelVertex && var.storage == StorageClassInput && + type.array.size() == 1) + { + SPIRV_CROSS_THROW("OpenGL ES doesn't support array input variables in vertex shader."); + } + emit_interface_block(var); + emitted = true; + } + else if (is_builtin_variable(var)) + { + auto builtin = BuiltIn(get_decoration(var.self, DecorationBuiltIn)); + // For gl_InstanceIndex emulation on GLES, the API user needs to + // supply this uniform. + + // The draw parameter extension is soft-enabled on GL with some fallbacks. + if (!options.vulkan_semantics) + { + if (!emitted_base_instance && + ((options.vertex.support_nonzero_base_instance && builtin == BuiltInInstanceIndex) || + (builtin == BuiltInBaseInstance))) + { + statement("#ifdef GL_ARB_shader_draw_parameters"); + statement("#define SPIRV_Cross_BaseInstance gl_BaseInstanceARB"); + statement("#else"); + // A crude, but simple workaround which should be good enough for non-indirect draws. + statement("uniform int SPIRV_Cross_BaseInstance;"); + statement("#endif"); + emitted = true; + emitted_base_instance = true; + } + else if (builtin == BuiltInBaseVertex) + { + statement("#ifdef GL_ARB_shader_draw_parameters"); + statement("#define SPIRV_Cross_BaseVertex gl_BaseVertexARB"); + statement("#else"); + // A crude, but simple workaround which should be good enough for non-indirect draws. + statement("uniform int SPIRV_Cross_BaseVertex;"); + statement("#endif"); + } + else if (builtin == BuiltInDrawIndex) + { + statement("#ifndef GL_ARB_shader_draw_parameters"); + // Cannot really be worked around. + statement("#error GL_ARB_shader_draw_parameters is not supported."); + statement("#endif"); + } + } + } + }); + + // Global variables. + for (auto global : global_variables) + { + auto &var = get<SPIRVariable>(global); + if (is_hidden_variable(var, true)) + continue; + + if (var.storage != StorageClassOutput) + { + if (!variable_is_lut(var)) + { + add_resource_name(var.self); + + string initializer; + if (options.force_zero_initialized_variables && var.storage == StorageClassPrivate && + !var.initializer && !var.static_expression && type_can_zero_initialize(get_variable_data_type(var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(var))); + } + + statement(variable_decl(var), initializer, ";"); + emitted = true; + } + } + else if (var.initializer && maybe_get<SPIRConstant>(var.initializer) != nullptr) + { + emit_output_variable_initializer(var); + } + } + + if (emitted) + statement(""); +} + +void CompilerGLSL::emit_output_variable_initializer(const SPIRVariable &var) +{ + // If a StorageClassOutput variable has an initializer, we need to initialize it in main(). + auto &entry_func = this->get<SPIRFunction>(ir.default_entry_point); + auto &type = get<SPIRType>(var.basetype); + bool is_patch = has_decoration(var.self, DecorationPatch); + bool is_block = has_decoration(type.self, DecorationBlock); + bool is_control_point = get_execution_model() == ExecutionModelTessellationControl && !is_patch; + + if (is_block) + { + uint32_t member_count = uint32_t(type.member_types.size()); + bool type_is_array = type.array.size() == 1; + uint32_t array_size = 1; + if (type_is_array) + array_size = to_array_size_literal(type); + uint32_t iteration_count = is_control_point ? 1 : array_size; + + // If the initializer is a block, we must initialize each block member one at a time. + for (uint32_t i = 0; i < member_count; i++) + { + // These outputs might not have been properly declared, so don't initialize them in that case. + if (has_member_decoration(type.self, i, DecorationBuiltIn)) + { + if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInCullDistance && + !cull_distance_count) + continue; + + if (get_member_decoration(type.self, i, DecorationBuiltIn) == BuiltInClipDistance && + !clip_distance_count) + continue; + } + + // We need to build a per-member array first, essentially transposing from AoS to SoA. + // This code path hits when we have an array of blocks. + string lut_name; + if (type_is_array) + { + lut_name = join("_", var.self, "_", i, "_init"); + uint32_t member_type_id = get<SPIRType>(var.basetype).member_types[i]; + auto &member_type = get<SPIRType>(member_type_id); + auto array_type = member_type; + array_type.parent_type = member_type_id; + array_type.op = OpTypeArray; + array_type.array.push_back(array_size); + array_type.array_size_literal.push_back(true); + + SmallVector<string> exprs; + exprs.reserve(array_size); + auto &c = get<SPIRConstant>(var.initializer); + for (uint32_t j = 0; j < array_size; j++) + exprs.push_back(to_expression(get<SPIRConstant>(c.subconstants[j]).subconstants[i])); + statement("const ", type_to_glsl(array_type), " ", lut_name, type_to_array_glsl(array_type, 0), " = ", + type_to_glsl_constructor(array_type), "(", merge(exprs, ", "), ");"); + } + + for (uint32_t j = 0; j < iteration_count; j++) + { + entry_func.fixup_hooks_in.push_back([=, &var]() { + AccessChainMeta meta; + auto &c = this->get<SPIRConstant>(var.initializer); + + uint32_t invocation_id = 0; + uint32_t member_index_id = 0; + if (is_control_point) + { + uint32_t ids = ir.increase_bound_by(3); + auto &uint_type = set<SPIRType>(ids, OpTypeInt); + uint_type.basetype = SPIRType::UInt; + uint_type.width = 32; + set<SPIRExpression>(ids + 1, builtin_to_glsl(BuiltInInvocationId, StorageClassInput), ids, true); + set<SPIRConstant>(ids + 2, ids, i, false); + invocation_id = ids + 1; + member_index_id = ids + 2; + } + + if (is_patch) + { + statement("if (gl_InvocationID == 0)"); + begin_scope(); + } + + if (type_is_array && !is_control_point) + { + uint32_t indices[2] = { j, i }; + auto chain = access_chain_internal(var.self, indices, 2, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + statement(chain, " = ", lut_name, "[", j, "];"); + } + else if (is_control_point) + { + uint32_t indices[2] = { invocation_id, member_index_id }; + auto chain = access_chain_internal(var.self, indices, 2, 0, &meta); + statement(chain, " = ", lut_name, "[", builtin_to_glsl(BuiltInInvocationId, StorageClassInput), "];"); + } + else + { + auto chain = + access_chain_internal(var.self, &i, 1, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &meta); + statement(chain, " = ", to_expression(c.subconstants[i]), ";"); + } + + if (is_patch) + end_scope(); + }); + } + } + } + else if (is_control_point) + { + auto lut_name = join("_", var.self, "_init"); + statement("const ", type_to_glsl(type), " ", lut_name, type_to_array_glsl(type, 0), + " = ", to_expression(var.initializer), ";"); + entry_func.fixup_hooks_in.push_back([&, lut_name]() { + statement(to_expression(var.self), "[gl_InvocationID] = ", lut_name, "[gl_InvocationID];"); + }); + } + else if (has_decoration(var.self, DecorationBuiltIn) && + BuiltIn(get_decoration(var.self, DecorationBuiltIn)) == BuiltInSampleMask) + { + // We cannot copy the array since gl_SampleMask is unsized in GLSL. Unroll time! <_< + entry_func.fixup_hooks_in.push_back([&] { + auto &c = this->get<SPIRConstant>(var.initializer); + uint32_t num_constants = uint32_t(c.subconstants.size()); + for (uint32_t i = 0; i < num_constants; i++) + { + // Don't use to_expression on constant since it might be uint, just fish out the raw int. + statement(to_expression(var.self), "[", i, "] = ", + convert_to_string(this->get<SPIRConstant>(c.subconstants[i]).scalar_i32()), ";"); + } + }); + } + else + { + auto lut_name = join("_", var.self, "_init"); + statement("const ", type_to_glsl(type), " ", lut_name, + type_to_array_glsl(type, var.self), " = ", to_expression(var.initializer), ";"); + entry_func.fixup_hooks_in.push_back([&, lut_name, is_patch]() { + if (is_patch) + { + statement("if (gl_InvocationID == 0)"); + begin_scope(); + } + statement(to_expression(var.self), " = ", lut_name, ";"); + if (is_patch) + end_scope(); + }); + } +} + +void CompilerGLSL::emit_subgroup_arithmetic_workaround(const std::string &func, Op op, GroupOperation group_op) +{ + std::string result; + switch (group_op) + { + case GroupOperationReduce: + result = "reduction"; + break; + + case GroupOperationExclusiveScan: + result = "excl_scan"; + break; + + case GroupOperationInclusiveScan: + result = "incl_scan"; + break; + + default: + SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation"); + } + + struct TypeInfo + { + std::string type; + std::string identity; + }; + + std::vector<TypeInfo> type_infos; + switch (op) + { + case OpGroupNonUniformIAdd: + { + type_infos.emplace_back(TypeInfo{ "uint", "0u" }); + type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(0u)" }); + type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(0u)" }); + type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(0u)" }); + type_infos.emplace_back(TypeInfo{ "int", "0" }); + type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(0)" }); + type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(0)" }); + type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(0)" }); + break; + } + + case OpGroupNonUniformFAdd: + { + type_infos.emplace_back(TypeInfo{ "float", "0.0f" }); + type_infos.emplace_back(TypeInfo{ "vec2", "vec2(0.0f)" }); + type_infos.emplace_back(TypeInfo{ "vec3", "vec3(0.0f)" }); + type_infos.emplace_back(TypeInfo{ "vec4", "vec4(0.0f)" }); + // ARB_gpu_shader_fp64 is required in GL4.0 which in turn is required by NV_thread_shuffle + type_infos.emplace_back(TypeInfo{ "double", "0.0LF" }); + type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(0.0LF)" }); + type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(0.0LF)" }); + type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(0.0LF)" }); + break; + } + + case OpGroupNonUniformIMul: + { + type_infos.emplace_back(TypeInfo{ "uint", "1u" }); + type_infos.emplace_back(TypeInfo{ "uvec2", "uvec2(1u)" }); + type_infos.emplace_back(TypeInfo{ "uvec3", "uvec3(1u)" }); + type_infos.emplace_back(TypeInfo{ "uvec4", "uvec4(1u)" }); + type_infos.emplace_back(TypeInfo{ "int", "1" }); + type_infos.emplace_back(TypeInfo{ "ivec2", "ivec2(1)" }); + type_infos.emplace_back(TypeInfo{ "ivec3", "ivec3(1)" }); + type_infos.emplace_back(TypeInfo{ "ivec4", "ivec4(1)" }); + break; + } + + case OpGroupNonUniformFMul: + { + type_infos.emplace_back(TypeInfo{ "float", "1.0f" }); + type_infos.emplace_back(TypeInfo{ "vec2", "vec2(1.0f)" }); + type_infos.emplace_back(TypeInfo{ "vec3", "vec3(1.0f)" }); + type_infos.emplace_back(TypeInfo{ "vec4", "vec4(1.0f)" }); + type_infos.emplace_back(TypeInfo{ "double", "0.0LF" }); + type_infos.emplace_back(TypeInfo{ "dvec2", "dvec2(1.0LF)" }); + type_infos.emplace_back(TypeInfo{ "dvec3", "dvec3(1.0LF)" }); + type_infos.emplace_back(TypeInfo{ "dvec4", "dvec4(1.0LF)" }); + break; + } + + default: + SPIRV_CROSS_THROW("Unsupported workaround for arithmetic group operation"); + } + + const bool op_is_addition = op == OpGroupNonUniformIAdd || op == OpGroupNonUniformFAdd; + const bool op_is_multiplication = op == OpGroupNonUniformIMul || op == OpGroupNonUniformFMul; + std::string op_symbol; + if (op_is_addition) + { + op_symbol = "+="; + } + else if (op_is_multiplication) + { + op_symbol = "*="; + } + + for (const TypeInfo &t : type_infos) + { + statement(t.type, " ", func, "(", t.type, " v)"); + begin_scope(); + statement(t.type, " ", result, " = ", t.identity, ";"); + statement("uvec4 active_threads = subgroupBallot(true);"); + statement("if (subgroupBallotBitCount(active_threads) == gl_SubgroupSize)"); + begin_scope(); + statement("uint total = gl_SubgroupSize / 2u;"); + statement(result, " = v;"); + statement("for (uint i = 1u; i <= total; i <<= 1u)"); + begin_scope(); + statement("bool valid;"); + if (group_op == GroupOperationReduce) + { + statement(t.type, " s = shuffleXorNV(", result, ", i, gl_SubgroupSize, valid);"); + } + else if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan) + { + statement(t.type, " s = shuffleUpNV(", result, ", i, gl_SubgroupSize, valid);"); + } + if (op_is_addition || op_is_multiplication) + { + statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";"); + } + end_scope(); + if (group_op == GroupOperationExclusiveScan) + { + statement(result, " = shuffleUpNV(", result, ", 1u, gl_SubgroupSize);"); + statement("if (subgroupElect())"); + begin_scope(); + statement(result, " = ", t.identity, ";"); + end_scope(); + } + end_scope(); + statement("else"); + begin_scope(); + if (group_op == GroupOperationExclusiveScan) + { + statement("uint total = subgroupBallotBitCount(gl_SubgroupLtMask);"); + } + else if (group_op == GroupOperationInclusiveScan) + { + statement("uint total = subgroupBallotBitCount(gl_SubgroupLeMask);"); + } + statement("for (uint i = 0u; i < gl_SubgroupSize; ++i)"); + begin_scope(); + statement("bool valid = subgroupBallotBitExtract(active_threads, i);"); + statement(t.type, " s = shuffleNV(v, i, gl_SubgroupSize);"); + if (group_op == GroupOperationExclusiveScan || group_op == GroupOperationInclusiveScan) + { + statement("valid = valid && (i < total);"); + } + if (op_is_addition || op_is_multiplication) + { + statement(result, " ", op_symbol, " valid ? s : ", t.identity, ";"); + } + end_scope(); + end_scope(); + statement("return ", result, ";"); + end_scope(); + } +} + +void CompilerGLSL::emit_extension_workarounds(spv::ExecutionModel model) +{ + static const char *workaround_types[] = { "int", "ivec2", "ivec3", "ivec4", "uint", "uvec2", "uvec3", "uvec4", + "float", "vec2", "vec3", "vec4", "double", "dvec2", "dvec3", "dvec4" }; + + if (!options.vulkan_semantics) + { + using Supp = ShaderSubgroupSupportHelper; + auto result = shader_subgroup_supporter.resolve(); + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMask)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupMask, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupEqMask uvec4(gl_ThreadEqMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupGeMask uvec4(gl_ThreadGeMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupGtMask uvec4(gl_ThreadGtMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupLeMask uvec4(gl_ThreadLeMaskNV, 0u, 0u, 0u)"); + statement("#define gl_SubgroupLtMask uvec4(gl_ThreadLtMaskNV, 0u, 0u, 0u)"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupEqMask uvec4(unpackUint2x32(gl_SubGroupEqMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupGeMask uvec4(unpackUint2x32(gl_SubGroupGeMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupGtMask uvec4(unpackUint2x32(gl_SubGroupGtMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupLeMask uvec4(unpackUint2x32(gl_SubGroupLeMaskARB), 0u, 0u)"); + statement("#define gl_SubgroupLtMask uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), 0u, 0u)"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupSize)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupSize, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupSize gl_WarpSizeNV"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupSize gl_SubGroupSizeARB"); + break; + case Supp::AMD_gcn_shader: + statement("#define gl_SubgroupSize uint(gl_SIMDGroupSizeAMD)"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInvocationID)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupInvocationID, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupInvocationID gl_ThreadInWarpNV"); + break; + case Supp::ARB_shader_ballot: + statement("#define gl_SubgroupInvocationID gl_SubGroupInvocationARB"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupID)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupID, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_SubgroupID gl_WarpIDNV"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::NumSubgroups)) + { + auto exts = Supp::get_candidates_for_feature(Supp::NumSubgroups, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("#define gl_NumSubgroups gl_WarpsPerSMNV"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBroadcast_First)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBroadcast_First, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_shuffle: + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcastFirst(", t, + " value) { return shuffleNV(value, findLSB(ballotThreadNV(true)), gl_WarpSizeNV); }"); + } + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcast(", t, + " value, uint id) { return shuffleNV(value, id, gl_WarpSizeNV); }"); + } + break; + case Supp::ARB_shader_ballot: + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcastFirst(", t, + " value) { return readFirstInvocationARB(value); }"); + } + for (const char *t : workaround_types) + { + statement(t, " subgroupBroadcast(", t, + " value, uint id) { return readInvocationARB(value, id); }"); + } + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotFindLSB_MSB)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallotFindLSB_MSB, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("uint subgroupBallotFindLSB(uvec4 value) { return findLSB(value.x); }"); + statement("uint subgroupBallotFindMSB(uvec4 value) { return findMSB(value.x); }"); + break; + default: + break; + } + } + statement("#else"); + statement("uint subgroupBallotFindLSB(uvec4 value)"); + begin_scope(); + statement("int firstLive = findLSB(value.x);"); + statement("return uint(firstLive != -1 ? firstLive : (findLSB(value.y) + 32));"); + end_scope(); + statement("uint subgroupBallotFindMSB(uvec4 value)"); + begin_scope(); + statement("int firstLive = findMSB(value.y);"); + statement("return uint(firstLive != -1 ? (firstLive + 32) : findMSB(value.x));"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAll_Any_AllEqualBool)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupAll_Any_AllEqualBool, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_gpu_shader_5: + statement("bool subgroupAll(bool value) { return allThreadsNV(value); }"); + statement("bool subgroupAny(bool value) { return anyThreadNV(value); }"); + statement("bool subgroupAllEqual(bool value) { return allThreadsEqualNV(value); }"); + break; + case Supp::ARB_shader_group_vote: + statement("bool subgroupAll(bool v) { return allInvocationsARB(v); }"); + statement("bool subgroupAny(bool v) { return anyInvocationARB(v); }"); + statement("bool subgroupAllEqual(bool v) { return allInvocationsEqualARB(v); }"); + break; + case Supp::AMD_gcn_shader: + statement("bool subgroupAll(bool value) { return ballotAMD(value) == ballotAMD(true); }"); + statement("bool subgroupAny(bool value) { return ballotAMD(value) != 0ull; }"); + statement("bool subgroupAllEqual(bool value) { uint64_t b = ballotAMD(value); return b == 0ull || " + "b == ballotAMD(true); }"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupAllEqualT)) + { + statement("#ifndef GL_KHR_shader_subgroup_vote"); + statement( + "#define _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(type) bool subgroupAllEqual(type value) { return " + "subgroupAllEqual(subgroupBroadcastFirst(value) == value); }"); + for (const char *t : workaround_types) + statement("_SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND(", t, ")"); + statement("#undef _SPIRV_CROSS_SUBGROUP_ALL_EQUAL_WORKAROUND"); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallot)) + { + auto exts = Supp::get_candidates_for_feature(Supp::SubgroupBallot, result); + + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_group: + statement("uvec4 subgroupBallot(bool v) { return uvec4(ballotThreadNV(v), 0u, 0u, 0u); }"); + break; + case Supp::ARB_shader_ballot: + statement("uvec4 subgroupBallot(bool v) { return uvec4(unpackUint2x32(ballotARB(v)), 0u, 0u); }"); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupElect)) + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("bool subgroupElect()"); + begin_scope(); + statement("uvec4 activeMask = subgroupBallot(true);"); + statement("uint firstLive = subgroupBallotFindLSB(activeMask);"); + statement("return gl_SubgroupInvocationID == firstLive;"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBarrier)) + { + // Extensions we're using in place of GL_KHR_shader_subgroup_basic state + // that subgroup execute in lockstep so this barrier is implicit. + // However the GL 4.6 spec also states that `barrier` implies a shared memory barrier, + // and a specific test of optimizing scans by leveraging lock-step invocation execution, + // has shown that a `memoryBarrierShared` is needed in place of a `subgroupBarrier`. + // https://github.com/buildaworldnet/IrrlichtBAW/commit/d8536857991b89a30a6b65d29441e51b64c2c7ad#diff-9f898d27be1ea6fc79b03d9b361e299334c1a347b6e4dc344ee66110c6aa596aR19 + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupBarrier() { memoryBarrierShared(); }"); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupMemBarrier)) + { + if (model == spv::ExecutionModelGLCompute) + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupMemoryBarrier() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierBuffer() { groupMemoryBarrier(); }"); + statement("void subgroupMemoryBarrierShared() { memoryBarrierShared(); }"); + statement("void subgroupMemoryBarrierImage() { groupMemoryBarrier(); }"); + statement("#endif"); + } + else + { + statement("#ifndef GL_KHR_shader_subgroup_basic"); + statement("void subgroupMemoryBarrier() { memoryBarrier(); }"); + statement("void subgroupMemoryBarrierBuffer() { memoryBarrierBuffer(); }"); + statement("void subgroupMemoryBarrierImage() { memoryBarrierImage(); }"); + statement("#endif"); + } + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupInverseBallot_InclBitCount_ExclBitCout)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("bool subgroupInverseBallot(uvec4 value)"); + begin_scope(); + statement("return any(notEqual(value.xy & gl_SubgroupEqMask.xy, uvec2(0u)));"); + end_scope(); + + statement("uint subgroupBallotInclusiveBitCount(uvec4 value)"); + begin_scope(); + statement("uvec2 v = value.xy & gl_SubgroupLeMask.xy;"); + statement("ivec2 c = bitCount(v);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + + statement("uint subgroupBallotExclusiveBitCount(uvec4 value)"); + begin_scope(); + statement("uvec2 v = value.xy & gl_SubgroupLtMask.xy;"); + statement("ivec2 c = bitCount(v);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitCount)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("uint subgroupBallotBitCount(uvec4 value)"); + begin_scope(); + statement("ivec2 c = bitCount(value.xy);"); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("return uint(c.x);"); + statement_no_indent("#else"); + statement("return uint(c.x + c.y);"); + statement_no_indent("#endif"); + end_scope(); + statement("#endif"); + statement(""); + } + + if (shader_subgroup_supporter.is_feature_requested(Supp::SubgroupBallotBitExtract)) + { + statement("#ifndef GL_KHR_shader_subgroup_ballot"); + statement("bool subgroupBallotBitExtract(uvec4 value, uint index)"); + begin_scope(); + statement_no_indent("#ifdef GL_NV_shader_thread_group"); + statement("uint shifted = value.x >> index;"); + statement_no_indent("#else"); + statement("uint shifted = value[index >> 5u] >> (index & 0x1fu);"); + statement_no_indent("#endif"); + statement("return (shifted & 1u) != 0u;"); + end_scope(); + statement("#endif"); + statement(""); + } + + auto arithmetic_feature_helper = + [&](Supp::Feature feat, std::string func_name, spv::Op op, spv::GroupOperation group_op) + { + if (shader_subgroup_supporter.is_feature_requested(feat)) + { + auto exts = Supp::get_candidates_for_feature(feat, result); + for (auto &e : exts) + { + const char *name = Supp::get_extension_name(e); + statement(&e == &exts.front() ? "#if" : "#elif", " defined(", name, ")"); + + switch (e) + { + case Supp::NV_shader_thread_shuffle: + emit_subgroup_arithmetic_workaround(func_name, op, group_op); + break; + default: + break; + } + } + statement("#endif"); + statement(""); + } + }; + + arithmetic_feature_helper(Supp::SubgroupArithmeticIAddReduce, "subgroupAdd", OpGroupNonUniformIAdd, + GroupOperationReduce); + arithmetic_feature_helper(Supp::SubgroupArithmeticIAddExclusiveScan, "subgroupExclusiveAdd", + OpGroupNonUniformIAdd, GroupOperationExclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticIAddInclusiveScan, "subgroupInclusiveAdd", + OpGroupNonUniformIAdd, GroupOperationInclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticFAddReduce, "subgroupAdd", OpGroupNonUniformFAdd, + GroupOperationReduce); + arithmetic_feature_helper(Supp::SubgroupArithmeticFAddExclusiveScan, "subgroupExclusiveAdd", + OpGroupNonUniformFAdd, GroupOperationExclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticFAddInclusiveScan, "subgroupInclusiveAdd", + OpGroupNonUniformFAdd, GroupOperationInclusiveScan); + + arithmetic_feature_helper(Supp::SubgroupArithmeticIMulReduce, "subgroupMul", OpGroupNonUniformIMul, + GroupOperationReduce); + arithmetic_feature_helper(Supp::SubgroupArithmeticIMulExclusiveScan, "subgroupExclusiveMul", + OpGroupNonUniformIMul, GroupOperationExclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticIMulInclusiveScan, "subgroupInclusiveMul", + OpGroupNonUniformIMul, GroupOperationInclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticFMulReduce, "subgroupMul", OpGroupNonUniformFMul, + GroupOperationReduce); + arithmetic_feature_helper(Supp::SubgroupArithmeticFMulExclusiveScan, "subgroupExclusiveMul", + OpGroupNonUniformFMul, GroupOperationExclusiveScan); + arithmetic_feature_helper(Supp::SubgroupArithmeticFMulInclusiveScan, "subgroupInclusiveMul", + OpGroupNonUniformFMul, GroupOperationInclusiveScan); + } + + if (!workaround_ubo_load_overload_types.empty()) + { + for (auto &type_id : workaround_ubo_load_overload_types) + { + auto &type = get<SPIRType>(type_id); + + if (options.es && is_matrix(type)) + { + // Need both variants. + // GLSL cannot overload on precision, so need to dispatch appropriately. + statement("highp ", type_to_glsl(type), " spvWorkaroundRowMajor(highp ", type_to_glsl(type), " wrap) { return wrap; }"); + statement("mediump ", type_to_glsl(type), " spvWorkaroundRowMajorMP(mediump ", type_to_glsl(type), " wrap) { return wrap; }"); + } + else + { + statement(type_to_glsl(type), " spvWorkaroundRowMajor(", type_to_glsl(type), " wrap) { return wrap; }"); + } + } + statement(""); + } +} + +void CompilerGLSL::emit_polyfills(uint32_t polyfills, bool relaxed) +{ + const char *qual = ""; + const char *suffix = (options.es && relaxed) ? "MP" : ""; + if (options.es) + qual = relaxed ? "mediump " : "highp "; + + if (polyfills & PolyfillTranspose2x2) + { + statement(qual, "mat2 spvTranspose", suffix, "(", qual, "mat2 m)"); + begin_scope(); + statement("return mat2(m[0][0], m[1][0], m[0][1], m[1][1]);"); + end_scope(); + statement(""); + } + + if (polyfills & PolyfillTranspose3x3) + { + statement(qual, "mat3 spvTranspose", suffix, "(", qual, "mat3 m)"); + begin_scope(); + statement("return mat3(m[0][0], m[1][0], m[2][0], m[0][1], m[1][1], m[2][1], m[0][2], m[1][2], m[2][2]);"); + end_scope(); + statement(""); + } + + if (polyfills & PolyfillTranspose4x4) + { + statement(qual, "mat4 spvTranspose", suffix, "(", qual, "mat4 m)"); + begin_scope(); + statement("return mat4(m[0][0], m[1][0], m[2][0], m[3][0], m[0][1], m[1][1], m[2][1], m[3][1], m[0][2], " + "m[1][2], m[2][2], m[3][2], m[0][3], m[1][3], m[2][3], m[3][3]);"); + end_scope(); + statement(""); + } + + if (polyfills & PolyfillDeterminant2x2) + { + statement(qual, "float spvDeterminant", suffix, "(", qual, "mat2 m)"); + begin_scope(); + statement("return m[0][0] * m[1][1] - m[0][1] * m[1][0];"); + end_scope(); + statement(""); + } + + if (polyfills & PolyfillDeterminant3x3) + { + statement(qual, "float spvDeterminant", suffix, "(", qual, "mat3 m)"); + begin_scope(); + statement("return dot(m[0], vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], " + "m[1][2] * m[2][0] - m[1][0] * m[2][2], " + "m[1][0] * m[2][1] - m[1][1] * m[2][0]));"); + end_scope(); + statement(""); + } + + if (polyfills & PolyfillDeterminant4x4) + { + statement(qual, "float spvDeterminant", suffix, "(", qual, "mat4 m)"); + begin_scope(); + statement("return dot(m[0], vec4(" + "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " + "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " + "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " + "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]));"); + end_scope(); + statement(""); + } + + if (polyfills & PolyfillMatrixInverse2x2) + { + statement(qual, "mat2 spvInverse", suffix, "(", qual, "mat2 m)"); + begin_scope(); + statement("return mat2(m[1][1], -m[0][1], -m[1][0], m[0][0]) " + "* (1.0 / (m[0][0] * m[1][1] - m[1][0] * m[0][1]));"); + end_scope(); + statement(""); + } + + if (polyfills & PolyfillMatrixInverse3x3) + { + statement(qual, "mat3 spvInverse", suffix, "(", qual, "mat3 m)"); + begin_scope(); + statement(qual, "vec3 t = vec3(m[1][1] * m[2][2] - m[1][2] * m[2][1], m[1][2] * m[2][0] - m[1][0] * m[2][2], m[1][0] * m[2][1] - m[1][1] * m[2][0]);"); + statement("return mat3(t[0], " + "m[0][2] * m[2][1] - m[0][1] * m[2][2], " + "m[0][1] * m[1][2] - m[0][2] * m[1][1], " + "t[1], " + "m[0][0] * m[2][2] - m[0][2] * m[2][0], " + "m[0][2] * m[1][0] - m[0][0] * m[1][2], " + "t[2], " + "m[0][1] * m[2][0] - m[0][0] * m[2][1], " + "m[0][0] * m[1][1] - m[0][1] * m[1][0]) " + "* (1.0 / dot(m[0], t));"); + end_scope(); + statement(""); + } + + if (polyfills & PolyfillMatrixInverse4x4) + { + statement(qual, "mat4 spvInverse", suffix, "(", qual, "mat4 m)"); + begin_scope(); + statement(qual, "vec4 t = vec4(" + "m[2][1] * m[3][2] * m[1][3] - m[3][1] * m[2][2] * m[1][3] + m[3][1] * m[1][2] * m[2][3] - m[1][1] * m[3][2] * m[2][3] - m[2][1] * m[1][2] * m[3][3] + m[1][1] * m[2][2] * m[3][3], " + "m[3][0] * m[2][2] * m[1][3] - m[2][0] * m[3][2] * m[1][3] - m[3][0] * m[1][2] * m[2][3] + m[1][0] * m[3][2] * m[2][3] + m[2][0] * m[1][2] * m[3][3] - m[1][0] * m[2][2] * m[3][3], " + "m[2][0] * m[3][1] * m[1][3] - m[3][0] * m[2][1] * m[1][3] + m[3][0] * m[1][1] * m[2][3] - m[1][0] * m[3][1] * m[2][3] - m[2][0] * m[1][1] * m[3][3] + m[1][0] * m[2][1] * m[3][3], " + "m[3][0] * m[2][1] * m[1][2] - m[2][0] * m[3][1] * m[1][2] - m[3][0] * m[1][1] * m[2][2] + m[1][0] * m[3][1] * m[2][2] + m[2][0] * m[1][1] * m[3][2] - m[1][0] * m[2][1] * m[3][2]);"); + statement("return mat4(" + "t[0], " + "m[3][1] * m[2][2] * m[0][3] - m[2][1] * m[3][2] * m[0][3] - m[3][1] * m[0][2] * m[2][3] + m[0][1] * m[3][2] * m[2][3] + m[2][1] * m[0][2] * m[3][3] - m[0][1] * m[2][2] * m[3][3], " + "m[1][1] * m[3][2] * m[0][3] - m[3][1] * m[1][2] * m[0][3] + m[3][1] * m[0][2] * m[1][3] - m[0][1] * m[3][2] * m[1][3] - m[1][1] * m[0][2] * m[3][3] + m[0][1] * m[1][2] * m[3][3], " + "m[2][1] * m[1][2] * m[0][3] - m[1][1] * m[2][2] * m[0][3] - m[2][1] * m[0][2] * m[1][3] + m[0][1] * m[2][2] * m[1][3] + m[1][1] * m[0][2] * m[2][3] - m[0][1] * m[1][2] * m[2][3], " + "t[1], " + "m[2][0] * m[3][2] * m[0][3] - m[3][0] * m[2][2] * m[0][3] + m[3][0] * m[0][2] * m[2][3] - m[0][0] * m[3][2] * m[2][3] - m[2][0] * m[0][2] * m[3][3] + m[0][0] * m[2][2] * m[3][3], " + "m[3][0] * m[1][2] * m[0][3] - m[1][0] * m[3][2] * m[0][3] - m[3][0] * m[0][2] * m[1][3] + m[0][0] * m[3][2] * m[1][3] + m[1][0] * m[0][2] * m[3][3] - m[0][0] * m[1][2] * m[3][3], " + "m[1][0] * m[2][2] * m[0][3] - m[2][0] * m[1][2] * m[0][3] + m[2][0] * m[0][2] * m[1][3] - m[0][0] * m[2][2] * m[1][3] - m[1][0] * m[0][2] * m[2][3] + m[0][0] * m[1][2] * m[2][3], " + "t[2], " + "m[3][0] * m[2][1] * m[0][3] - m[2][0] * m[3][1] * m[0][3] - m[3][0] * m[0][1] * m[2][3] + m[0][0] * m[3][1] * m[2][3] + m[2][0] * m[0][1] * m[3][3] - m[0][0] * m[2][1] * m[3][3], " + "m[1][0] * m[3][1] * m[0][3] - m[3][0] * m[1][1] * m[0][3] + m[3][0] * m[0][1] * m[1][3] - m[0][0] * m[3][1] * m[1][3] - m[1][0] * m[0][1] * m[3][3] + m[0][0] * m[1][1] * m[3][3], " + "m[2][0] * m[1][1] * m[0][3] - m[1][0] * m[2][1] * m[0][3] - m[2][0] * m[0][1] * m[1][3] + m[0][0] * m[2][1] * m[1][3] + m[1][0] * m[0][1] * m[2][3] - m[0][0] * m[1][1] * m[2][3], " + "t[3], " + "m[2][0] * m[3][1] * m[0][2] - m[3][0] * m[2][1] * m[0][2] + m[3][0] * m[0][1] * m[2][2] - m[0][0] * m[3][1] * m[2][2] - m[2][0] * m[0][1] * m[3][2] + m[0][0] * m[2][1] * m[3][2], " + "m[3][0] * m[1][1] * m[0][2] - m[1][0] * m[3][1] * m[0][2] - m[3][0] * m[0][1] * m[1][2] + m[0][0] * m[3][1] * m[1][2] + m[1][0] * m[0][1] * m[3][2] - m[0][0] * m[1][1] * m[3][2], " + "m[1][0] * m[2][1] * m[0][2] - m[2][0] * m[1][1] * m[0][2] + m[2][0] * m[0][1] * m[1][2] - m[0][0] * m[2][1] * m[1][2] - m[1][0] * m[0][1] * m[2][2] + m[0][0] * m[1][1] * m[2][2]) " + "* (1.0 / dot(m[0], t));"); + end_scope(); + statement(""); + } + + if (!relaxed) + { + static const Polyfill polys[3][3] = { + { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 }, + { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 }, + { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 }, + }; + + static const GLSLstd450 glsl_ops[] = { GLSLstd450NMin, GLSLstd450NMax, GLSLstd450NClamp }; + static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" }; + bool has_poly = false; + + for (uint32_t i = 0; i < 3; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + if ((polyfills & polys[i][j]) == 0) + continue; + + const char *types[3][4] = { + { "float16_t", "f16vec2", "f16vec3", "f16vec4" }, + { "float", "vec2", "vec3", "vec4" }, + { "double", "dvec2", "dvec3", "dvec4" }, + }; + + for (uint32_t k = 0; k < 4; k++) + { + auto *type = types[j][k]; + + if (i < 2) + { + statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ", + type, " ", spv_ops[i], "(", type, ", ", type, ");"); + } + else + { + statement("spirv_instruction(set = \"GLSL.std.450\", id = ", glsl_ops[i], ") ", + type, " ", spv_ops[i], "(", type, ", ", type, ", ", type, ");"); + } + + has_poly = true; + } + } + } + + if (has_poly) + statement(""); + } + else + { + // Mediump intrinsics don't work correctly, so wrap the intrinsic in an outer shell that ensures mediump + // propagation. + + static const Polyfill polys[3][3] = { + { PolyfillNMin16, PolyfillNMin32, PolyfillNMin64 }, + { PolyfillNMax16, PolyfillNMax32, PolyfillNMax64 }, + { PolyfillNClamp16, PolyfillNClamp32, PolyfillNClamp64 }, + }; + + static const char *spv_ops[] = { "spvNMin", "spvNMax", "spvNClamp" }; + + for (uint32_t i = 0; i < 3; i++) + { + for (uint32_t j = 0; j < 3; j++) + { + if ((polyfills & polys[i][j]) == 0) + continue; + + const char *types[3][4] = { + { "float16_t", "f16vec2", "f16vec3", "f16vec4" }, + { "float", "vec2", "vec3", "vec4" }, + { "double", "dvec2", "dvec3", "dvec4" }, + }; + + for (uint32_t k = 0; k < 4; k++) + { + auto *type = types[j][k]; + + if (i < 2) + { + statement("mediump ", type, " ", spv_ops[i], "Relaxed(", + "mediump ", type, " a, mediump ", type, " b)"); + begin_scope(); + statement("mediump ", type, " res = ", spv_ops[i], "(a, b);"); + statement("return res;"); + end_scope(); + statement(""); + } + else + { + statement("mediump ", type, " ", spv_ops[i], "Relaxed(", + "mediump ", type, " a, mediump ", type, " b, mediump ", type, " c)"); + begin_scope(); + statement("mediump ", type, " res = ", spv_ops[i], "(a, b, c);"); + statement("return res;"); + end_scope(); + statement(""); + } + } + } + } + } +} + +// Returns a string representation of the ID, usable as a function arg. +// Default is to simply return the expression representation fo the arg ID. +// Subclasses may override to modify the return value. +string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) +{ + // Make sure that we use the name of the original variable, and not the parameter alias. + uint32_t name_id = id; + auto *var = maybe_get<SPIRVariable>(id); + if (var && var->basevariable) + name_id = var->basevariable; + return to_expression(name_id); +} + +void CompilerGLSL::force_temporary_and_recompile(uint32_t id) +{ + auto res = forced_temporaries.insert(id); + + // Forcing new temporaries guarantees forward progress. + if (res.second) + force_recompile_guarantee_forward_progress(); + else + force_recompile(); +} + +uint32_t CompilerGLSL::consume_temporary_in_precision_context(uint32_t type_id, uint32_t id, Options::Precision precision) +{ + // Constants do not have innate precision. + auto handle_type = ir.ids[id].get_type(); + if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) + return id; + + // Ignore anything that isn't 32-bit values. + auto &type = get<SPIRType>(type_id); + if (type.pointer) + return id; + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::UInt && type.basetype != SPIRType::Int) + return id; + + if (precision == Options::DontCare) + { + // If precision is consumed as don't care (operations only consisting of constants), + // we need to bind the expression to a temporary, + // otherwise we have no way of controlling the precision later. + auto itr = forced_temporaries.insert(id); + if (itr.second) + force_recompile_guarantee_forward_progress(); + return id; + } + + auto current_precision = has_decoration(id, DecorationRelaxedPrecision) ? Options::Mediump : Options::Highp; + if (current_precision == precision) + return id; + + auto itr = temporary_to_mirror_precision_alias.find(id); + if (itr == temporary_to_mirror_precision_alias.end()) + { + uint32_t alias_id = ir.increase_bound_by(1); + auto &m = ir.meta[alias_id]; + if (auto *input_m = ir.find_meta(id)) + m = *input_m; + + const char *prefix; + if (precision == Options::Mediump) + { + set_decoration(alias_id, DecorationRelaxedPrecision); + prefix = "mp_copy_"; + } + else + { + unset_decoration(alias_id, DecorationRelaxedPrecision); + prefix = "hp_copy_"; + } + + auto alias_name = join(prefix, to_name(id)); + ParsedIR::sanitize_underscores(alias_name); + set_name(alias_id, alias_name); + + emit_op(type_id, alias_id, to_expression(id), true); + temporary_to_mirror_precision_alias[id] = alias_id; + forced_temporaries.insert(id); + forced_temporaries.insert(alias_id); + force_recompile_guarantee_forward_progress(); + id = alias_id; + } + else + { + id = itr->second; + } + + return id; +} + +void CompilerGLSL::handle_invalid_expression(uint32_t id) +{ + // We tried to read an invalidated expression. + // This means we need another pass at compilation, but next time, + // force temporary variables so that they cannot be invalidated. + force_temporary_and_recompile(id); + + // If the invalid expression happened as a result of a CompositeInsert + // overwrite, we must block this from happening next iteration. + if (composite_insert_overwritten.count(id)) + block_composite_insert_overwrite.insert(id); +} + +// Converts the format of the current expression from packed to unpacked, +// by wrapping the expression in a constructor of the appropriate type. +// GLSL does not support packed formats, so simply return the expression. +// Subclasses that do will override. +string CompilerGLSL::unpack_expression_type(string expr_str, const SPIRType &, uint32_t, bool, bool) +{ + return expr_str; +} + +// Sometimes we proactively enclosed an expression where it turns out we might have not needed it after all. +void CompilerGLSL::strip_enclosed_expression(string &expr) +{ + if (expr.size() < 2 || expr.front() != '(' || expr.back() != ')') + return; + + // Have to make sure that our first and last parens actually enclose everything inside it. + uint32_t paren_count = 0; + for (auto &c : expr) + { + if (c == '(') + paren_count++; + else if (c == ')') + { + paren_count--; + + // If we hit 0 and this is not the final char, our first and final parens actually don't + // enclose the expression, and we cannot strip, e.g.: (a + b) * (c + d). + if (paren_count == 0 && &c != &expr.back()) + return; + } + } + expr.erase(expr.size() - 1, 1); + expr.erase(begin(expr)); +} + +bool CompilerGLSL::needs_enclose_expression(const std::string &expr) +{ + bool need_parens = false; + + // If the expression starts with a unary we need to enclose to deal with cases where we have back-to-back + // unary expressions. + if (!expr.empty()) + { + auto c = expr.front(); + if (c == '-' || c == '+' || c == '!' || c == '~' || c == '&' || c == '*') + need_parens = true; + } + + if (!need_parens) + { + uint32_t paren_count = 0; + for (auto c : expr) + { + if (c == '(' || c == '[') + paren_count++; + else if (c == ')' || c == ']') + { + assert(paren_count); + paren_count--; + } + else if (c == ' ' && paren_count == 0) + { + need_parens = true; + break; + } + } + assert(paren_count == 0); + } + + return need_parens; +} + +string CompilerGLSL::enclose_expression(const string &expr) +{ + // If this expression contains any spaces which are not enclosed by parentheses, + // we need to enclose it so we can treat the whole string as an expression. + // This happens when two expressions have been part of a binary op earlier. + if (needs_enclose_expression(expr)) + return join('(', expr, ')'); + else + return expr; +} + +string CompilerGLSL::dereference_expression(const SPIRType &expr_type, const std::string &expr) +{ + // If this expression starts with an address-of operator ('&'), then + // just return the part after the operator. + // TODO: Strip parens if unnecessary? + if (expr.front() == '&') + return expr.substr(1); + else if (backend.native_pointers) + return join('*', expr); + else if (is_physical_pointer(expr_type) && !is_physical_pointer_to_buffer_block(expr_type)) + return join(enclose_expression(expr), ".value"); + else + return expr; +} + +string CompilerGLSL::address_of_expression(const std::string &expr) +{ + if (expr.size() > 3 && expr[0] == '(' && expr[1] == '*' && expr.back() == ')') + { + // If we have an expression which looks like (*foo), taking the address of it is the same as stripping + // the first two and last characters. We might have to enclose the expression. + // This doesn't work for cases like (*foo + 10), + // but this is an r-value expression which we cannot take the address of anyways. + return enclose_expression(expr.substr(2, expr.size() - 3)); + } + else if (expr.front() == '*') + { + // If this expression starts with a dereference operator ('*'), then + // just return the part after the operator. + return expr.substr(1); + } + else + return join('&', enclose_expression(expr)); +} + +// Just like to_expression except that we enclose the expression inside parentheses if needed. +string CompilerGLSL::to_enclosed_expression(uint32_t id, bool register_expression_read) +{ + return enclose_expression(to_expression(id, register_expression_read)); +} + +// Used explicitly when we want to read a row-major expression, but without any transpose shenanigans. +// need_transpose must be forced to false. +string CompilerGLSL::to_unpacked_row_major_matrix_expression(uint32_t id) +{ + return unpack_expression_type(to_expression(id), expression_type(id), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), true); +} + +string CompilerGLSL::to_unpacked_expression(uint32_t id, bool register_expression_read) +{ + // If we need to transpose, it will also take care of unpacking rules. + auto *e = maybe_get<SPIRExpression>(id); + bool need_transpose = e && e->need_transpose; + bool is_remapped = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + + if (!need_transpose && (is_remapped || is_packed)) + { + return unpack_expression_type(to_expression(id, register_expression_read), + get_pointee_type(expression_type_id(id)), + get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID), + has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked), false); + } + else + return to_expression(id, register_expression_read); +} + +string CompilerGLSL::to_enclosed_unpacked_expression(uint32_t id, bool register_expression_read) +{ + return enclose_expression(to_unpacked_expression(id, register_expression_read)); +} + +string CompilerGLSL::to_dereferenced_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + + if (is_pointer(type) && should_dereference(id)) + return dereference_expression(type, to_enclosed_expression(id, register_expression_read)); + else + return to_expression(id, register_expression_read); +} + +string CompilerGLSL::to_pointer_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (is_pointer(type) && expression_is_lvalue(id) && !should_dereference(id)) + return address_of_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_unpacked_expression(id, register_expression_read); +} + +string CompilerGLSL::to_enclosed_pointer_expression(uint32_t id, bool register_expression_read) +{ + auto &type = expression_type(id); + if (is_pointer(type) && expression_is_lvalue(id) && !should_dereference(id)) + return address_of_expression(to_enclosed_expression(id, register_expression_read)); + else + return to_enclosed_unpacked_expression(id, register_expression_read); +} + +string CompilerGLSL::to_extract_component_expression(uint32_t id, uint32_t index) +{ + auto expr = to_enclosed_expression(id); + if (has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked)) + return join(expr, "[", index, "]"); + else + return join(expr, ".", index_to_swizzle(index)); +} + +string CompilerGLSL::to_extract_constant_composite_expression(uint32_t result_type, const SPIRConstant &c, + const uint32_t *chain, uint32_t length) +{ + // It is kinda silly if application actually enter this path since they know the constant up front. + // It is useful here to extract the plain constant directly. + SPIRConstant tmp; + tmp.constant_type = result_type; + auto &composite_type = get<SPIRType>(c.constant_type); + assert(composite_type.basetype != SPIRType::Struct && composite_type.array.empty()); + assert(!c.specialization); + + if (is_matrix(composite_type)) + { + if (length == 2) + { + tmp.m.c[0].vecsize = 1; + tmp.m.columns = 1; + tmp.m.c[0].r[0] = c.m.c[chain[0]].r[chain[1]]; + } + else + { + assert(length == 1); + tmp.m.c[0].vecsize = composite_type.vecsize; + tmp.m.columns = 1; + tmp.m.c[0] = c.m.c[chain[0]]; + } + } + else + { + assert(length == 1); + tmp.m.c[0].vecsize = 1; + tmp.m.columns = 1; + tmp.m.c[0].r[0] = c.m.c[0].r[chain[0]]; + } + + return constant_expression(tmp); +} + +string CompilerGLSL::to_rerolled_array_expression(const SPIRType &parent_type, + const string &base_expr, const SPIRType &type) +{ + bool remapped_boolean = parent_type.basetype == SPIRType::Struct && + type.basetype == SPIRType::Boolean && + backend.boolean_in_struct_remapped_type != SPIRType::Boolean; + + SPIRType tmp_type { OpNop }; + if (remapped_boolean) + { + tmp_type = get<SPIRType>(type.parent_type); + tmp_type.basetype = backend.boolean_in_struct_remapped_type; + } + else if (type.basetype == SPIRType::Boolean && backend.boolean_in_struct_remapped_type != SPIRType::Boolean) + { + // It's possible that we have an r-value expression that was OpLoaded from a struct. + // We have to reroll this and explicitly cast the input to bool, because the r-value is short. + tmp_type = get<SPIRType>(type.parent_type); + remapped_boolean = true; + } + + uint32_t size = to_array_size_literal(type); + auto &parent = get<SPIRType>(type.parent_type); + string expr = "{ "; + + for (uint32_t i = 0; i < size; i++) + { + auto subexpr = join(base_expr, "[", convert_to_string(i), "]"); + if (!is_array(parent)) + { + if (remapped_boolean) + subexpr = join(type_to_glsl(tmp_type), "(", subexpr, ")"); + expr += subexpr; + } + else + expr += to_rerolled_array_expression(parent_type, subexpr, parent); + + if (i + 1 < size) + expr += ", "; + } + + expr += " }"; + return expr; +} + +string CompilerGLSL::to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type) +{ + auto &type = expression_type(id); + + bool reroll_array = false; + bool remapped_boolean = parent_type.basetype == SPIRType::Struct && + type.basetype == SPIRType::Boolean && + backend.boolean_in_struct_remapped_type != SPIRType::Boolean; + + if (is_array(type)) + { + reroll_array = !backend.array_is_value_type || + (block_like_type && !backend.array_is_value_type_in_buffer_blocks); + + if (remapped_boolean) + { + // Forced to reroll if we have to change bool[] to short[]. + reroll_array = true; + } + } + + if (reroll_array) + { + // For this case, we need to "re-roll" an array initializer from a temporary. + // We cannot simply pass the array directly, since it decays to a pointer and it cannot + // participate in a struct initializer. E.g. + // float arr[2] = { 1.0, 2.0 }; + // Foo foo = { arr }; must be transformed to + // Foo foo = { { arr[0], arr[1] } }; + // The array sizes cannot be deduced from specialization constants since we cannot use any loops. + + // We're only triggering one read of the array expression, but this is fine since arrays have to be declared + // as temporaries anyways. + return to_rerolled_array_expression(parent_type, to_enclosed_expression(id), type); + } + else + { + auto expr = to_unpacked_expression(id); + if (remapped_boolean) + { + auto tmp_type = type; + tmp_type.basetype = backend.boolean_in_struct_remapped_type; + expr = join(type_to_glsl(tmp_type), "(", expr, ")"); + } + + return expr; + } +} + +string CompilerGLSL::to_non_uniform_aware_expression(uint32_t id) +{ + string expr = to_expression(id); + + if (has_decoration(id, DecorationNonUniform)) + convert_non_uniform_expression(expr, id); + + return expr; +} + +string CompilerGLSL::to_expression(uint32_t id, bool register_expression_read) +{ + auto itr = invalid_expressions.find(id); + if (itr != end(invalid_expressions)) + handle_invalid_expression(id); + + if (ir.ids[id].get_type() == TypeExpression) + { + // We might have a more complex chain of dependencies. + // A possible scenario is that we + // + // %1 = OpLoad + // %2 = OpDoSomething %1 %1. here %2 will have a dependency on %1. + // %3 = OpDoSomethingAgain %2 %2. Here %3 will lose the link to %1 since we don't propagate the dependencies like that. + // OpStore %1 %foo // Here we can invalidate %1, and hence all expressions which depend on %1. Only %2 will know since it's part of invalid_expressions. + // %4 = OpDoSomethingAnotherTime %3 %3 // If we forward all expressions we will see %1 expression after store, not before. + // + // However, we can propagate up a list of depended expressions when we used %2, so we can check if %2 is invalid when reading %3 after the store, + // and see that we should not forward reads of the original variable. + auto &expr = get<SPIRExpression>(id); + for (uint32_t dep : expr.expression_dependencies) + if (invalid_expressions.find(dep) != end(invalid_expressions)) + handle_invalid_expression(dep); + } + + if (register_expression_read) + track_expression_read(id); + + switch (ir.ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get<SPIRExpression>(id); + if (e.base_expression) + return to_enclosed_expression(e.base_expression) + e.expression; + else if (e.need_transpose) + { + // This should not be reached for access chains, since we always deal explicitly with transpose state + // when consuming an access chain expression. + uint32_t physical_type_id = get_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + bool is_packed = has_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + bool relaxed = has_decoration(id, DecorationRelaxedPrecision); + return convert_row_major_matrix(e.expression, get<SPIRType>(e.expression_type), physical_type_id, + is_packed, relaxed); + } + else if (flattened_structs.count(id)) + { + return load_flattened_struct(e.expression, get<SPIRType>(e.expression_type)); + } + else + { + if (is_forcing_recompilation()) + { + // During first compilation phase, certain expression patterns can trigger exponential growth of memory. + // Avoid this by returning dummy expressions during this phase. + // Do not use empty expressions here, because those are sentinels for other cases. + return "_"; + } + else + return e.expression; + } + } + + case TypeConstant: + { + auto &c = get<SPIRConstant>(id); + auto &type = get<SPIRType>(c.constant_type); + + // WorkGroupSize may be a constant. + if (has_decoration(c.self, DecorationBuiltIn)) + return builtin_to_glsl(BuiltIn(get_decoration(c.self, DecorationBuiltIn)), StorageClassGeneric); + else if (c.specialization) + { + if (backend.workgroup_size_is_hidden) + { + int wg_index = get_constant_mapping_to_workgroup_component(c); + if (wg_index >= 0) + { + auto wg_size = join(builtin_to_glsl(BuiltInWorkgroupSize, StorageClassInput), vector_swizzle(1, wg_index)); + if (type.basetype != SPIRType::UInt) + wg_size = bitcast_expression(type, SPIRType::UInt, wg_size); + return wg_size; + } + } + + if (expression_is_forwarded(id)) + return constant_expression(c); + + return to_name(id); + } + else if (c.is_used_as_lut) + return to_name(id); + else if (type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) + return to_name(id); + else if (!type.array.empty() && !backend.can_declare_arrays_inline) + return to_name(id); + else + return constant_expression(c); + } + + case TypeConstantOp: + return to_name(id); + + case TypeVariable: + { + auto &var = get<SPIRVariable>(id); + // If we try to use a loop variable before the loop header, we have to redirect it to the static expression, + // the variable has not been declared yet. + if (var.statically_assigned || (var.loop_variable && !var.loop_variable_enable)) + { + // We might try to load from a loop variable before it has been initialized. + // Prefer static expression and fallback to initializer. + if (var.static_expression) + return to_expression(var.static_expression); + else if (var.initializer) + return to_expression(var.initializer); + else + { + // We cannot declare the variable yet, so have to fake it. + uint32_t undef_id = ir.increase_bound_by(1); + return emit_uninitialized_temporary_expression(get_variable_data_type_id(var), undef_id).expression; + } + } + else if (var.deferred_declaration) + { + var.deferred_declaration = false; + return variable_decl(var); + } + else if (flattened_structs.count(id)) + { + return load_flattened_struct(to_name(id), get<SPIRType>(var.basetype)); + } + else + { + auto &dec = ir.meta[var.self].decoration; + if (dec.builtin) + return builtin_to_glsl(dec.builtin_type, var.storage); + else + return to_name(id); + } + } + + case TypeCombinedImageSampler: + // This type should never be taken the expression of directly. + // The intention is that texture sampling functions will extract the image and samplers + // separately and take their expressions as needed. + // GLSL does not use this type because OpSampledImage immediately creates a combined image sampler + // expression ala sampler2D(texture, sampler). + SPIRV_CROSS_THROW("Combined image samplers have no default expression representation."); + + case TypeAccessChain: + // We cannot express this type. They only have meaning in other OpAccessChains, OpStore or OpLoad. + SPIRV_CROSS_THROW("Access chains have no default expression representation."); + + default: + return to_name(id); + } +} + +SmallVector<ConstantID> CompilerGLSL::get_composite_constant_ids(ConstantID const_id) +{ + if (auto *constant = maybe_get<SPIRConstant>(const_id)) + { + const auto &type = get<SPIRType>(constant->constant_type); + if (is_array(type) || type.basetype == SPIRType::Struct) + return constant->subconstants; + if (is_matrix(type)) + return SmallVector<ConstantID>(constant->m.id); + if (is_vector(type)) + return SmallVector<ConstantID>(constant->m.c[0].id); + SPIRV_CROSS_THROW("Unexpected scalar constant!"); + } + if (!const_composite_insert_ids.count(const_id)) + SPIRV_CROSS_THROW("Unimplemented for this OpSpecConstantOp!"); + return const_composite_insert_ids[const_id]; +} + +void CompilerGLSL::fill_composite_constant(SPIRConstant &constant, TypeID type_id, + const SmallVector<ConstantID> &initializers) +{ + auto &type = get<SPIRType>(type_id); + constant.specialization = true; + if (is_array(type) || type.basetype == SPIRType::Struct) + { + constant.subconstants = initializers; + } + else if (is_matrix(type)) + { + constant.m.columns = type.columns; + for (uint32_t i = 0; i < type.columns; ++i) + { + constant.m.id[i] = initializers[i]; + constant.m.c[i].vecsize = type.vecsize; + } + } + else if (is_vector(type)) + { + constant.m.c[0].vecsize = type.vecsize; + for (uint32_t i = 0; i < type.vecsize; ++i) + constant.m.c[0].id[i] = initializers[i]; + } + else + SPIRV_CROSS_THROW("Unexpected scalar in SpecConstantOp CompositeInsert!"); +} + +void CompilerGLSL::set_composite_constant(ConstantID const_id, TypeID type_id, + const SmallVector<ConstantID> &initializers) +{ + if (maybe_get<SPIRConstantOp>(const_id)) + { + const_composite_insert_ids[const_id] = initializers; + return; + } + + auto &constant = set<SPIRConstant>(const_id, type_id); + fill_composite_constant(constant, type_id, initializers); + forwarded_temporaries.insert(const_id); +} + +TypeID CompilerGLSL::get_composite_member_type(TypeID type_id, uint32_t member_idx) +{ + auto &type = get<SPIRType>(type_id); + if (is_array(type)) + return type.parent_type; + if (type.basetype == SPIRType::Struct) + return type.member_types[member_idx]; + if (is_matrix(type)) + return type.parent_type; + if (is_vector(type)) + return type.parent_type; + SPIRV_CROSS_THROW("Shouldn't reach lower than vector handling OpSpecConstantOp CompositeInsert!"); +} + +string CompilerGLSL::constant_op_expression(const SPIRConstantOp &cop) +{ + auto &type = get<SPIRType>(cop.basetype); + bool binary = false; + bool unary = false; + string op; + + if (is_legacy() && is_unsigned_opcode(cop.opcode)) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy targets."); + + // TODO: Find a clean way to reuse emit_instruction. + switch (cop.opcode) + { + case OpSConvert: + case OpUConvert: + case OpFConvert: + op = type_to_glsl_constructor(type); + break; + +#define GLSL_BOP(opname, x) \ + case Op##opname: \ + binary = true; \ + op = x; \ + break + +#define GLSL_UOP(opname, x) \ + case Op##opname: \ + unary = true; \ + op = x; \ + break + + GLSL_UOP(SNegate, "-"); + GLSL_UOP(Not, "~"); + GLSL_BOP(IAdd, "+"); + GLSL_BOP(ISub, "-"); + GLSL_BOP(IMul, "*"); + GLSL_BOP(SDiv, "/"); + GLSL_BOP(UDiv, "/"); + GLSL_BOP(UMod, "%"); + GLSL_BOP(SMod, "%"); + GLSL_BOP(ShiftRightLogical, ">>"); + GLSL_BOP(ShiftRightArithmetic, ">>"); + GLSL_BOP(ShiftLeftLogical, "<<"); + GLSL_BOP(BitwiseOr, "|"); + GLSL_BOP(BitwiseXor, "^"); + GLSL_BOP(BitwiseAnd, "&"); + GLSL_BOP(LogicalOr, "||"); + GLSL_BOP(LogicalAnd, "&&"); + GLSL_UOP(LogicalNot, "!"); + GLSL_BOP(LogicalEqual, "=="); + GLSL_BOP(LogicalNotEqual, "!="); + GLSL_BOP(IEqual, "=="); + GLSL_BOP(INotEqual, "!="); + GLSL_BOP(ULessThan, "<"); + GLSL_BOP(SLessThan, "<"); + GLSL_BOP(ULessThanEqual, "<="); + GLSL_BOP(SLessThanEqual, "<="); + GLSL_BOP(UGreaterThan, ">"); + GLSL_BOP(SGreaterThan, ">"); + GLSL_BOP(UGreaterThanEqual, ">="); + GLSL_BOP(SGreaterThanEqual, ">="); + + case OpSRem: + { + uint32_t op0 = cop.arguments[0]; + uint32_t op1 = cop.arguments[1]; + return join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + } + + case OpSelect: + { + if (cop.arguments.size() < 3) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // This one is pretty annoying. It's triggered from + // uint(bool), int(bool) from spec constants. + // In order to preserve its compile-time constness in Vulkan GLSL, + // we need to reduce the OpSelect expression back to this simplified model. + // If we cannot, fail. + if (to_trivial_mix_op(type, op, cop.arguments[2], cop.arguments[1], cop.arguments[0])) + { + // Implement as a simple cast down below. + } + else + { + // Implement a ternary and pray the compiler understands it :) + return to_ternary_expression(type, cop.arguments[0], cop.arguments[1], cop.arguments[2]); + } + break; + } + + case OpVectorShuffle: + { + string expr = type_to_glsl_constructor(type); + expr += "("; + + uint32_t left_components = expression_type(cop.arguments[0]).vecsize; + string left_arg = to_enclosed_expression(cop.arguments[0]); + string right_arg = to_enclosed_expression(cop.arguments[1]); + + for (uint32_t i = 2; i < uint32_t(cop.arguments.size()); i++) + { + uint32_t index = cop.arguments[i]; + if (index == 0xFFFFFFFF) + { + SPIRConstant c; + c.constant_type = type.parent_type; + assert(type.parent_type != ID(0)); + expr += constant_expression(c); + } + else if (index >= left_components) + { + expr += right_arg + "." + "xyzw"[index - left_components]; + } + else + { + expr += left_arg + "." + "xyzw"[index]; + } + + if (i + 1 < uint32_t(cop.arguments.size())) + expr += ", "; + } + + expr += ")"; + return expr; + } + + case OpCompositeExtract: + { + auto expr = access_chain_internal(cop.arguments[0], &cop.arguments[1], uint32_t(cop.arguments.size() - 1), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + return expr; + } + + case OpCompositeInsert: + { + SmallVector<ConstantID> new_init = get_composite_constant_ids(cop.arguments[1]); + uint32_t idx; + uint32_t target_id = cop.self; + uint32_t target_type_id = cop.basetype; + // We have to drill down to the part we want to modify, and create new + // constants for each containing part. + for (idx = 2; idx < cop.arguments.size() - 1; ++idx) + { + uint32_t new_const = ir.increase_bound_by(1); + uint32_t old_const = new_init[cop.arguments[idx]]; + new_init[cop.arguments[idx]] = new_const; + set_composite_constant(target_id, target_type_id, new_init); + new_init = get_composite_constant_ids(old_const); + target_id = new_const; + target_type_id = get_composite_member_type(target_type_id, cop.arguments[idx]); + } + // Now replace the initializer with the one from this instruction. + new_init[cop.arguments[idx]] = cop.arguments[0]; + set_composite_constant(target_id, target_type_id, new_init); + SPIRConstant tmp_const(cop.basetype); + fill_composite_constant(tmp_const, cop.basetype, const_composite_insert_ids[cop.self]); + return constant_expression(tmp_const); + } + + default: + // Some opcodes are unimplemented here, these are currently not possible to test from glslang. + SPIRV_CROSS_THROW("Unimplemented spec constant op."); + } + + uint32_t bit_width = 0; + if (unary || binary || cop.opcode == OpSConvert || cop.opcode == OpUConvert) + bit_width = expression_type(cop.arguments[0]).width; + + SPIRType::BaseType input_type; + bool skip_cast_if_equal_type = opcode_is_sign_invariant(cop.opcode); + + switch (cop.opcode) + { + case OpIEqual: + case OpINotEqual: + input_type = to_signed_basetype(bit_width); + break; + + case OpSLessThan: + case OpSLessThanEqual: + case OpSGreaterThan: + case OpSGreaterThanEqual: + case OpSMod: + case OpSDiv: + case OpShiftRightArithmetic: + case OpSConvert: + case OpSNegate: + input_type = to_signed_basetype(bit_width); + break; + + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: + case OpUMod: + case OpUDiv: + case OpShiftRightLogical: + case OpUConvert: + input_type = to_unsigned_basetype(bit_width); + break; + + default: + input_type = type.basetype; + break; + } + +#undef GLSL_BOP +#undef GLSL_UOP + if (binary) + { + if (cop.arguments.size() < 2) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + string cast_op0; + string cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, cop.arguments[0], + cop.arguments[1], skip_cast_if_equal_type); + + if (type.basetype != input_type && type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + auto expr = bitcast_glsl_op(type, expected_type); + expr += '('; + expr += join(cast_op0, " ", op, " ", cast_op1); + expr += ')'; + return expr; + } + else + return join("(", cast_op0, " ", op, " ", cast_op1, ")"); + } + else if (unary) + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + // Auto-bitcast to result type as needed. + // Works around various casting scenarios in glslang as there is no OpBitcast for specialization constants. + return join("(", op, bitcast_glsl(type, cop.arguments[0]), ")"); + } + else if (cop.opcode == OpSConvert || cop.opcode == OpUConvert) + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + + auto &arg_type = expression_type(cop.arguments[0]); + if (arg_type.width < type.width && input_type != arg_type.basetype) + { + auto expected = arg_type; + expected.basetype = input_type; + return join(op, "(", bitcast_glsl(expected, cop.arguments[0]), ")"); + } + else + return join(op, "(", to_expression(cop.arguments[0]), ")"); + } + else + { + if (cop.arguments.size() < 1) + SPIRV_CROSS_THROW("Not enough arguments to OpSpecConstantOp."); + return join(op, "(", to_expression(cop.arguments[0]), ")"); + } +} + +string CompilerGLSL::constant_expression(const SPIRConstant &c, + bool inside_block_like_struct_scope, + bool inside_struct_scope) +{ + auto &type = get<SPIRType>(c.constant_type); + + if (is_pointer(type)) + { + return backend.null_pointer_literal; + } + else if (!c.subconstants.empty()) + { + // Handles Arrays and structures. + string res; + + // Only consider the decay if we are inside a struct scope where we are emitting a member with Offset decoration. + // Outside a block-like struct declaration, we can always bind to a constant array with templated type. + // Should look at ArrayStride here as well, but it's possible to declare a constant struct + // with Offset = 0, using no ArrayStride on the enclosed array type. + // A particular CTS test hits this scenario. + bool array_type_decays = inside_block_like_struct_scope && + is_array(type) && + !backend.array_is_value_type_in_buffer_blocks; + + // Allow Metal to use the array<T> template to make arrays a value type + bool needs_trailing_tracket = false; + if (backend.use_initializer_list && backend.use_typed_initializer_list && type.basetype == SPIRType::Struct && + !is_array(type)) + { + res = type_to_glsl_constructor(type) + "{ "; + } + else if (backend.use_initializer_list && backend.use_typed_initializer_list && backend.array_is_value_type && + is_array(type) && !array_type_decays) + { + const auto *p_type = &type; + SPIRType tmp_type { OpNop }; + + if (inside_struct_scope && + backend.boolean_in_struct_remapped_type != SPIRType::Boolean && + type.basetype == SPIRType::Boolean) + { + tmp_type = type; + tmp_type.basetype = backend.boolean_in_struct_remapped_type; + p_type = &tmp_type; + } + + res = type_to_glsl_constructor(*p_type) + "({ "; + needs_trailing_tracket = true; + } + else if (backend.use_initializer_list) + { + res = "{ "; + } + else + { + res = type_to_glsl_constructor(type) + "("; + } + + uint32_t subconstant_index = 0; + for (auto &elem : c.subconstants) + { + if (auto *op = maybe_get<SPIRConstantOp>(elem)) + { + res += constant_op_expression(*op); + } + else if (maybe_get<SPIRUndef>(elem) != nullptr) + { + res += to_name(elem); + } + else + { + auto &subc = get<SPIRConstant>(elem); + if (subc.specialization && !expression_is_forwarded(elem)) + res += to_name(elem); + else + { + if (!is_array(type) && type.basetype == SPIRType::Struct) + { + // When we get down to emitting struct members, override the block-like information. + // For constants, we can freely mix and match block-like state. + inside_block_like_struct_scope = + has_member_decoration(type.self, subconstant_index, DecorationOffset); + } + + if (type.basetype == SPIRType::Struct) + inside_struct_scope = true; + + res += constant_expression(subc, inside_block_like_struct_scope, inside_struct_scope); + } + } + + if (&elem != &c.subconstants.back()) + res += ", "; + + subconstant_index++; + } + + res += backend.use_initializer_list ? " }" : ")"; + if (needs_trailing_tracket) + res += ")"; + + return res; + } + else if (type.basetype == SPIRType::Struct && type.member_types.size() == 0) + { + // Metal tessellation likes empty structs which are then constant expressions. + if (backend.supports_empty_struct) + return "{ }"; + else if (backend.use_typed_initializer_list) + return join(type_to_glsl(type), "{ 0 }"); + else if (backend.use_initializer_list) + return "{ 0 }"; + else + return join(type_to_glsl(type), "(0)"); + } + else if (c.columns() == 1) + { + auto res = constant_expression_vector(c, 0); + + if (inside_struct_scope && + backend.boolean_in_struct_remapped_type != SPIRType::Boolean && + type.basetype == SPIRType::Boolean) + { + SPIRType tmp_type = type; + tmp_type.basetype = backend.boolean_in_struct_remapped_type; + res = join(type_to_glsl(tmp_type), "(", res, ")"); + } + + return res; + } + else + { + string res = type_to_glsl(type) + "("; + for (uint32_t col = 0; col < c.columns(); col++) + { + if (c.specialization_constant_id(col) != 0) + res += to_name(c.specialization_constant_id(col)); + else + res += constant_expression_vector(c, col); + + if (col + 1 < c.columns()) + res += ", "; + } + res += ")"; + + if (inside_struct_scope && + backend.boolean_in_struct_remapped_type != SPIRType::Boolean && + type.basetype == SPIRType::Boolean) + { + SPIRType tmp_type = type; + tmp_type.basetype = backend.boolean_in_struct_remapped_type; + res = join(type_to_glsl(tmp_type), "(", res, ")"); + } + + return res; + } +} + +#ifdef _MSC_VER +// snprintf does not exist or is buggy on older MSVC versions, some of them +// being used by MinGW. Use sprintf instead and disable corresponding warning. +#pragma warning(push) +#pragma warning(disable : 4996) +#endif + +string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_f16(col, row); + + // There is no literal "hf" in GL_NV_gpu_shader5, so to avoid lots + // of complicated workarounds, just value-cast to the half type always. + if (std::isnan(float_value) || std::isinf(float_value)) + { + SPIRType type { OpTypeFloat }; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + + if (float_value == numeric_limits<float>::infinity()) + res = join(type_to_glsl(type), "(1.0 / 0.0)"); + else if (float_value == -numeric_limits<float>::infinity()) + res = join(type_to_glsl(type), "(-1.0 / 0.0)"); + else if (std::isnan(float_value)) + res = join(type_to_glsl(type), "(0.0 / 0.0)"); + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + else + { + SPIRType type { OpTypeFloat }; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(", format_float(float_value), ")"); + } + + return res; +} + +string CompilerGLSL::convert_float_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + float float_value = c.scalar_f32(col, row); + + if (std::isnan(float_value) || std::isinf(float_value)) + { + // Use special representation. + if (!is_legacy()) + { + SPIRType out_type { OpTypeFloat }; + SPIRType in_type { OpTypeInt }; + out_type.basetype = SPIRType::Float; + in_type.basetype = SPIRType::UInt; + out_type.vecsize = 1; + in_type.vecsize = 1; + out_type.width = 32; + in_type.width = 32; + + char print_buffer[32]; +#ifdef _WIN32 + sprintf(print_buffer, "0x%xu", c.scalar(col, row)); +#else + snprintf(print_buffer, sizeof(print_buffer), "0x%xu", c.scalar(col, row)); +#endif + + const char *comment = "inf"; + if (float_value == -numeric_limits<float>::infinity()) + comment = "-inf"; + else if (std::isnan(float_value)) + comment = "nan"; + res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)"); + } + else + { + if (float_value == numeric_limits<float>::infinity()) + { + if (backend.float_literal_suffix) + res = "(1.0f / 0.0f)"; + else + res = "(1.0 / 0.0)"; + } + else if (float_value == -numeric_limits<float>::infinity()) + { + if (backend.float_literal_suffix) + res = "(-1.0f / 0.0f)"; + else + res = "(-1.0 / 0.0)"; + } + else if (std::isnan(float_value)) + { + if (backend.float_literal_suffix) + res = "(0.0f / 0.0f)"; + else + res = "(0.0 / 0.0)"; + } + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + } + else + { + res = format_float(float_value); + if (backend.float_literal_suffix) + res += "f"; + } + + return res; +} + +std::string CompilerGLSL::convert_double_to_string(const SPIRConstant &c, uint32_t col, uint32_t row) +{ + string res; + double double_value = c.scalar_f64(col, row); + + if (std::isnan(double_value) || std::isinf(double_value)) + { + // Use special representation. + if (!is_legacy()) + { + SPIRType out_type { OpTypeFloat }; + SPIRType in_type { OpTypeInt }; + out_type.basetype = SPIRType::Double; + in_type.basetype = SPIRType::UInt64; + out_type.vecsize = 1; + in_type.vecsize = 1; + out_type.width = 64; + in_type.width = 64; + + uint64_t u64_value = c.scalar_u64(col, row); + + if (options.es && options.version < 310) // GL_NV_gpu_shader5 fallback requires 310. + SPIRV_CROSS_THROW("64-bit integers not supported in ES profile before version 310."); + require_extension_internal("GL_ARB_gpu_shader_int64"); + + char print_buffer[64]; +#ifdef _WIN32 + sprintf(print_buffer, "0x%llx%s", static_cast<unsigned long long>(u64_value), + backend.long_long_literal_suffix ? "ull" : "ul"); +#else + snprintf(print_buffer, sizeof(print_buffer), "0x%llx%s", static_cast<unsigned long long>(u64_value), + backend.long_long_literal_suffix ? "ull" : "ul"); +#endif + + const char *comment = "inf"; + if (double_value == -numeric_limits<double>::infinity()) + comment = "-inf"; + else if (std::isnan(double_value)) + comment = "nan"; + res = join(bitcast_glsl_op(out_type, in_type), "(", print_buffer, " /* ", comment, " */)"); + } + else + { + if (options.es) + SPIRV_CROSS_THROW("FP64 not supported in ES profile."); + if (options.version < 400) + require_extension_internal("GL_ARB_gpu_shader_fp64"); + + if (double_value == numeric_limits<double>::infinity()) + { + if (backend.double_literal_suffix) + res = "(1.0lf / 0.0lf)"; + else + res = "(1.0 / 0.0)"; + } + else if (double_value == -numeric_limits<double>::infinity()) + { + if (backend.double_literal_suffix) + res = "(-1.0lf / 0.0lf)"; + else + res = "(-1.0 / 0.0)"; + } + else if (std::isnan(double_value)) + { + if (backend.double_literal_suffix) + res = "(0.0lf / 0.0lf)"; + else + res = "(0.0 / 0.0)"; + } + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } + } + else + { + res = format_double(double_value); + if (backend.double_literal_suffix) + res += "lf"; + } + + return res; +} + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +string CompilerGLSL::constant_expression_vector(const SPIRConstant &c, uint32_t vector) +{ + auto type = get<SPIRType>(c.constant_type); + type.columns = 1; + + auto scalar_type = type; + scalar_type.vecsize = 1; + + string res; + bool splat = backend.use_constructor_splatting && c.vector_size() > 1; + bool swizzle_splat = backend.can_swizzle_scalar && c.vector_size() > 1; + + if (!type_is_floating_point(type)) + { + // Cannot swizzle literal integers as a special case. + swizzle_splat = false; + } + + if (splat || swizzle_splat) + { + // Cannot use constant splatting if we have specialization constants somewhere in the vector. + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.specialization_constant_id(vector, i) != 0) + { + splat = false; + swizzle_splat = false; + break; + } + } + } + + if (splat || swizzle_splat) + { + if (type.width == 64) + { + uint64_t ident = c.scalar_u64(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + { + if (ident != c.scalar_u64(vector, i)) + { + splat = false; + swizzle_splat = false; + break; + } + } + } + else + { + uint32_t ident = c.scalar(vector, 0); + for (uint32_t i = 1; i < c.vector_size(); i++) + { + if (ident != c.scalar(vector, i)) + { + splat = false; + swizzle_splat = false; + } + } + } + } + + if (c.vector_size() > 1 && !swizzle_splat) + res += type_to_glsl(type) + "("; + + switch (type.basetype) + { + case SPIRType::Half: + if (splat || swizzle_splat) + { + res += convert_half_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_half_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Float: + if (splat || swizzle_splat) + { + res += convert_float_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_float_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Double: + if (splat || swizzle_splat) + { + res += convert_double_to_string(c, vector, 0); + if (swizzle_splat) + res = remap_swizzle(get<SPIRType>(c.constant_type), 1, res); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_double_to_string(c, vector, i); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int64: + { + auto tmp = type; + tmp.vecsize = 1; + tmp.columns = 1; + auto int64_type = type_to_glsl(tmp); + + if (splat) + { + res += convert_to_string(c.scalar_i64(vector, 0), int64_type, backend.long_long_literal_suffix); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_to_string(c.scalar_i64(vector, i), int64_type, backend.long_long_literal_suffix); + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + } + + case SPIRType::UInt64: + if (splat) + { + res += convert_to_string(c.scalar_u64(vector, 0)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar_u64(vector, i)); + if (backend.long_long_literal_suffix) + res += "ull"; + else + res += "ul"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UInt: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i32(vector, 0) < 0) + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made the literal negative."); + } + else if (backend.uint32_t_literal_suffix) + res += "u"; + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + res += convert_to_string(c.scalar(vector, i)); + if (is_legacy()) + { + // Fake unsigned constant literals with signed ones if possible. + // Things like array sizes, etc, tend to be unsigned even though they could just as easily be signed. + if (c.scalar_i32(vector, i) < 0) + SPIRV_CROSS_THROW("Tried to convert uint literal into int, but this made " + "the literal negative."); + } + else if (backend.uint32_t_literal_suffix) + res += "u"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Int: + if (splat) + res += convert_to_string(c.scalar_i32(vector, 0)); + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += convert_to_string(c.scalar_i32(vector, i)); + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UShort: + if (splat) + { + res += convert_to_string(c.scalar(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + if (*backend.uint16_t_literal_suffix) + { + res += convert_to_string(c.scalar_u16(vector, i)); + res += backend.uint16_t_literal_suffix; + } + else + { + // If backend doesn't have a literal suffix, we need to value cast. + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_u16(vector, i)); + res += ")"; + } + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Short: + if (splat) + { + res += convert_to_string(c.scalar_i16(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + if (*backend.int16_t_literal_suffix) + { + res += convert_to_string(c.scalar_i16(vector, i)); + res += backend.int16_t_literal_suffix; + } + else + { + // If backend doesn't have a literal suffix, we need to value cast. + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_i16(vector, i)); + res += ")"; + } + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::UByte: + if (splat) + { + res += convert_to_string(c.scalar_u8(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_u8(vector, i)); + res += ")"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::SByte: + if (splat) + { + res += convert_to_string(c.scalar_i8(vector, 0)); + } + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + { + res += type_to_glsl(scalar_type); + res += "("; + res += convert_to_string(c.scalar_i8(vector, i)); + res += ")"; + } + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + case SPIRType::Boolean: + if (splat) + res += c.scalar(vector, 0) ? "true" : "false"; + else + { + for (uint32_t i = 0; i < c.vector_size(); i++) + { + if (c.vector_size() > 1 && c.specialization_constant_id(vector, i) != 0) + res += to_expression(c.specialization_constant_id(vector, i)); + else + res += c.scalar(vector, i) ? "true" : "false"; + + if (i + 1 < c.vector_size()) + res += ", "; + } + } + break; + + default: + SPIRV_CROSS_THROW("Invalid constant expression basetype."); + } + + if (c.vector_size() > 1 && !swizzle_splat) + res += ")"; + + return res; +} + +SPIRExpression &CompilerGLSL::emit_uninitialized_temporary_expression(uint32_t type, uint32_t id) +{ + forced_temporaries.insert(id); + emit_uninitialized_temporary(type, id); + return set<SPIRExpression>(id, to_name(id), type, true); +} + +void CompilerGLSL::emit_uninitialized_temporary(uint32_t result_type, uint32_t result_id) +{ + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id)) + { + auto &header = get<SPIRBlock>(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + hoisted_temporaries.insert(result_id); + force_recompile(); + } + } + else if (hoisted_temporaries.count(result_id) == 0) + { + auto &type = get<SPIRType>(result_type); + auto &flags = get_decoration_bitset(result_id); + + // The result_id has not been made into an expression yet, so use flags interface. + add_local_variable_name(result_id); + + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(result_type)); + + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), initializer, ";"); + } +} + +string CompilerGLSL::declare_temporary(uint32_t result_type, uint32_t result_id) +{ + auto &type = get<SPIRType>(result_type); + + // If we're declaring temporaries inside continue blocks, + // we must declare the temporary in the loop header so that the continue block can avoid declaring new variables. + if (!block_temporary_hoisting && current_continue_block && !hoisted_temporaries.count(result_id)) + { + auto &header = get<SPIRBlock>(current_continue_block->loop_dominator); + if (find_if(begin(header.declare_temporary), end(header.declare_temporary), + [result_type, result_id](const pair<uint32_t, uint32_t> &tmp) { + return tmp.first == result_type && tmp.second == result_id; + }) == end(header.declare_temporary)) + { + header.declare_temporary.emplace_back(result_type, result_id); + hoisted_temporaries.insert(result_id); + force_recompile_guarantee_forward_progress(); + } + + return join(to_name(result_id), " = "); + } + else if (hoisted_temporaries.count(result_id)) + { + // The temporary has already been declared earlier, so just "declare" the temporary by writing to it. + return join(to_name(result_id), " = "); + } + else + { + // The result_id has not been made into an expression yet, so use flags interface. + add_local_variable_name(result_id); + auto &flags = get_decoration_bitset(result_id); + return join(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(result_id)), " = "); + } +} + +bool CompilerGLSL::expression_is_forwarded(uint32_t id) const +{ + return forwarded_temporaries.count(id) != 0; +} + +bool CompilerGLSL::expression_suppresses_usage_tracking(uint32_t id) const +{ + return suppressed_usage_tracking.count(id) != 0; +} + +bool CompilerGLSL::expression_read_implies_multiple_reads(uint32_t id) const +{ + auto *expr = maybe_get<SPIRExpression>(id); + if (!expr) + return false; + + // If we're emitting code at a deeper loop level than when we emitted the expression, + // we're probably reading the same expression over and over. + return current_loop_level > expr->emitted_loop_level; +} + +SPIRExpression &CompilerGLSL::emit_op(uint32_t result_type, uint32_t result_id, const string &rhs, bool forwarding, + bool suppress_usage_tracking) +{ + if (forwarding && (forced_temporaries.find(result_id) == end(forced_temporaries))) + { + // Just forward it without temporary. + // If the forward is trivial, we do not force flushing to temporary for this expression. + forwarded_temporaries.insert(result_id); + if (suppress_usage_tracking) + suppressed_usage_tracking.insert(result_id); + + return set<SPIRExpression>(result_id, rhs, result_type, true); + } + else + { + // If expression isn't immutable, bind it to a temporary and make the new temporary immutable (they always are). + statement(declare_temporary(result_type, result_id), rhs, ";"); + return set<SPIRExpression>(result_id, to_name(result_id), result_type, true); + } +} + +void CompilerGLSL::emit_unary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, to_enclosed_unpacked_expression(op0)), forward); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_unary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + auto &type = get<SPIRType>(result_type); + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(type_to_glsl(type), "(", op, to_enclosed_unpacked_expression(op0), ")"), forward); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_mesh_tasks(SPIRBlock &block) +{ + statement("EmitMeshTasksEXT(", + to_unpacked_expression(block.mesh.groups[0]), ", ", + to_unpacked_expression(block.mesh.groups[1]), ", ", + to_unpacked_expression(block.mesh.groups[2]), ");"); +} + +void CompilerGLSL::emit_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, const char *op) +{ + // Various FP arithmetic opcodes such as add, sub, mul will hit this. + bool force_temporary_precise = backend.support_precise_qualifier && + has_decoration(result_id, DecorationNoContraction) && + type_is_floating_point(get<SPIRType>(result_type)); + bool forward = should_forward(op0) && should_forward(op1) && !force_temporary_precise; + + emit_op(result_type, result_id, + join(to_enclosed_unpacked_expression(op0), " ", op, " ", to_enclosed_unpacked_expression(op1)), forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_unrolled_unary_op(uint32_t result_type, uint32_t result_id, uint32_t operand, const char *op) +{ + auto &type = get<SPIRType>(result_type); + auto expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + // Make sure to call to_expression multiple times to ensure + // that these expressions are properly flushed to temporaries if needed. + expr += op; + expr += to_extract_component_expression(operand, i); + + if (i + 1 < type.vecsize) + expr += ", "; + } + expr += ')'; + emit_op(result_type, result_id, expr, should_forward(operand)); + + inherit_expression_dependencies(result_id, operand); +} + +void CompilerGLSL::emit_unrolled_binary_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, bool negate, SPIRType::BaseType expected_type) +{ + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); + + SPIRType target_type0 = type0; + SPIRType target_type1 = type1; + target_type0.basetype = expected_type; + target_type1.basetype = expected_type; + target_type0.vecsize = 1; + target_type1.vecsize = 1; + + auto &type = get<SPIRType>(result_type); + auto expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + // Make sure to call to_expression multiple times to ensure + // that these expressions are properly flushed to temporaries if needed. + if (negate) + expr += "!("; + + if (expected_type != SPIRType::Unknown && type0.basetype != expected_type) + expr += bitcast_expression(target_type0, type0.basetype, to_extract_component_expression(op0, i)); + else + expr += to_extract_component_expression(op0, i); + + expr += ' '; + expr += op; + expr += ' '; + + if (expected_type != SPIRType::Unknown && type1.basetype != expected_type) + expr += bitcast_expression(target_type1, type1.basetype, to_extract_component_expression(op1, i)); + else + expr += to_extract_component_expression(op1, i); + + if (negate) + expr += ")"; + + if (i + 1 < type.vecsize) + expr += ", "; + } + expr += ')'; + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +SPIRType CompilerGLSL::binary_op_bitcast_helper(string &cast_op0, string &cast_op1, SPIRType::BaseType &input_type, + uint32_t op0, uint32_t op1, bool skip_cast_if_equal_type) +{ + auto &type0 = expression_type(op0); + auto &type1 = expression_type(op1); + + // We have to bitcast if our inputs are of different type, or if our types are not equal to expected inputs. + // For some functions like OpIEqual and INotEqual, we don't care if inputs are of different types than expected + // since equality test is exactly the same. + bool cast = (type0.basetype != type1.basetype) || (!skip_cast_if_equal_type && type0.basetype != input_type); + + // Create a fake type so we can bitcast to it. + // We only deal with regular arithmetic types here like int, uints and so on. + SPIRType expected_type{type0.op}; + expected_type.basetype = input_type; + expected_type.vecsize = type0.vecsize; + expected_type.columns = type0.columns; + expected_type.width = type0.width; + + if (cast) + { + cast_op0 = bitcast_glsl(expected_type, op0); + cast_op1 = bitcast_glsl(expected_type, op1); + } + else + { + // If we don't cast, our actual input type is that of the first (or second) argument. + cast_op0 = to_enclosed_unpacked_expression(op0); + cast_op1 = to_enclosed_unpacked_expression(op1); + input_type = type0.basetype; + } + + return expected_type; +} + +bool CompilerGLSL::emit_complex_bitcast(uint32_t result_type, uint32_t id, uint32_t op0) +{ + // Some bitcasts may require complex casting sequences, and are implemented here. + // Otherwise a simply unary function will do with bitcast_glsl_op. + + auto &output_type = get<SPIRType>(result_type); + auto &input_type = expression_type(op0); + string expr; + + if (output_type.basetype == SPIRType::Half && input_type.basetype == SPIRType::Float && input_type.vecsize == 1) + expr = join("unpackFloat2x16(floatBitsToUint(", to_unpacked_expression(op0), "))"); + else if (output_type.basetype == SPIRType::Float && input_type.basetype == SPIRType::Half && + input_type.vecsize == 2) + expr = join("uintBitsToFloat(packFloat2x16(", to_unpacked_expression(op0), "))"); + else + return false; + + emit_op(result_type, id, expr, should_forward(op0)); + return true; +} + +void CompilerGLSL::emit_binary_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, + bool skip_cast_if_equal_type, + bool implicit_integer_promotion) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get<SPIRType>(result_type); + + // We might have casted away from the result type, so bitcast again. + // For example, arithmetic right shift with uint inputs. + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + auto bitop = join(cast_op0, " ", op, " ", cast_op1); + string expr; + + if (implicit_integer_promotion) + { + // Simple value cast. + expr = join(type_to_glsl(out_type), '(', bitop, ')'); + } + else if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + expr = join(bitcast_glsl_op(out_type, expected_type), '(', bitop, ')'); + } + else + { + expr = std::move(bitop); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_unary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op) +{ + bool forward = should_forward(op0); + emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ")"), forward); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + // Opaque types (e.g. OpTypeSampledImage) must always be forwarded in GLSL + const auto &type = get_type(result_type); + bool must_forward = type_is_opaque_value(type); + bool forward = must_forward || (should_forward(op0) && should_forward(op1)); + emit_op(result_type, result_id, join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ")"), + forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op) +{ + auto &type = get<SPIRType>(result_type); + if (type_is_floating_point(type)) + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Floating point atomics requires Vulkan semantics."); + if (options.es) + SPIRV_CROSS_THROW("Floating point atomics requires desktop GLSL."); + require_extension_internal("GL_EXT_shader_atomic_float"); + } + + forced_temporaries.insert(result_id); + emit_op(result_type, result_id, + join(op, "(", to_non_uniform_aware_expression(op0), ", ", + to_unpacked_expression(op1), ")"), false); + flush_all_atomic_capable_variables(); +} + +void CompilerGLSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, + uint32_t op0, uint32_t op1, uint32_t op2, + const char *op) +{ + forced_temporaries.insert(result_id); + emit_op(result_type, result_id, + join(op, "(", to_non_uniform_aware_expression(op0), ", ", + to_unpacked_expression(op1), ", ", to_unpacked_expression(op2), ")"), false); + flush_all_atomic_capable_variables(); +} + +void CompilerGLSL::emit_unary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, const char *op, + SPIRType::BaseType input_type, SPIRType::BaseType expected_result_type) +{ + auto &out_type = get<SPIRType>(result_type); + auto &expr_type = expression_type(op0); + auto expected_type = out_type; + + // Bit-widths might be different in unary cases because we use it for SConvert/UConvert and friends. + expected_type.basetype = input_type; + expected_type.width = expr_type.width; + + string cast_op; + if (expr_type.basetype != input_type) + { + if (expr_type.basetype == SPIRType::Boolean) + cast_op = join(type_to_glsl(expected_type), "(", to_unpacked_expression(op0), ")"); + else + cast_op = bitcast_glsl(expected_type, op0); + } + else + cast_op = to_unpacked_expression(op0); + + string expr; + if (out_type.basetype != expected_result_type) + { + expected_type.basetype = expected_result_type; + expected_type.width = out_type.width; + if (out_type.basetype == SPIRType::Boolean) + expr = type_to_glsl(out_type); + else + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0)); + inherit_expression_dependencies(result_id, op0); +} + +// Very special case. Handling bitfieldExtract requires us to deal with different bitcasts of different signs +// and different vector sizes all at once. Need a special purpose method here. +void CompilerGLSL::emit_trinary_func_op_bitextract(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, + SPIRType::BaseType expected_result_type, + SPIRType::BaseType input_type0, SPIRType::BaseType input_type1, + SPIRType::BaseType input_type2) +{ + auto &out_type = get<SPIRType>(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type0; + + string cast_op0 = + expression_type(op0).basetype != input_type0 ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + + auto op1_expr = to_unpacked_expression(op1); + auto op2_expr = to_unpacked_expression(op2); + + // Use value casts here instead. Input must be exactly int or uint, but SPIR-V might be 16-bit. + expected_type.basetype = input_type1; + expected_type.vecsize = 1; + string cast_op1 = expression_type(op1).basetype != input_type1 ? + join(type_to_glsl_constructor(expected_type), "(", op1_expr, ")") : + op1_expr; + + expected_type.basetype = input_type2; + expected_type.vecsize = 1; + string cast_op2 = expression_type(op2).basetype != input_type2 ? + join(type_to_glsl_constructor(expected_type), "(", op2_expr, ")") : + op2_expr; + + string expr; + if (out_type.basetype != expected_result_type) + { + expected_type.vecsize = out_type.vecsize; + expected_type.basetype = expected_result_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + +void CompilerGLSL::emit_trinary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op, SPIRType::BaseType input_type) +{ + auto &out_type = get<SPIRType>(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type; + string cast_op0 = + expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + string cast_op1 = + expression_type(op1).basetype != input_type ? bitcast_glsl(expected_type, op1) : to_unpacked_expression(op1); + string cast_op2 = + expression_type(op2).basetype != input_type ? bitcast_glsl(expected_type, op2) : to_unpacked_expression(op2); + + string expr; + if (out_type.basetype != input_type) + { + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ", ", cast_op2, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1) && should_forward(op2)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + +void CompilerGLSL::emit_binary_func_op_cast_clustered(uint32_t result_type, uint32_t result_id, uint32_t op0, + uint32_t op1, const char *op, SPIRType::BaseType input_type) +{ + // Special purpose method for implementing clustered subgroup opcodes. + // Main difference is that op1 does not participate in any casting, it needs to be a literal. + auto &out_type = get<SPIRType>(result_type); + auto expected_type = out_type; + expected_type.basetype = input_type; + string cast_op0 = + expression_type(op0).basetype != input_type ? bitcast_glsl(expected_type, op0) : to_unpacked_expression(op0); + + string expr; + if (out_type.basetype != input_type) + { + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", to_expression(op1), ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0)); + inherit_expression_dependencies(result_id, op0); +} + +void CompilerGLSL::emit_binary_func_op_cast(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + const char *op, SPIRType::BaseType input_type, bool skip_cast_if_equal_type) +{ + string cast_op0, cast_op1; + auto expected_type = binary_op_bitcast_helper(cast_op0, cast_op1, input_type, op0, op1, skip_cast_if_equal_type); + auto &out_type = get<SPIRType>(result_type); + + // Special case boolean outputs since relational opcodes output booleans instead of int/uint. + string expr; + if (out_type.basetype != input_type && out_type.basetype != SPIRType::Boolean) + { + expected_type.basetype = input_type; + expr = bitcast_glsl_op(out_type, expected_type); + expr += '('; + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + expr += ')'; + } + else + { + expr += join(op, "(", cast_op0, ", ", cast_op1, ")"); + } + + emit_op(result_type, result_id, expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); +} + +void CompilerGLSL::emit_trinary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2); + emit_op(result_type, result_id, + join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); +} + +void CompilerGLSL::emit_quaternary_func_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op) +{ + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); + emit_op(result_type, result_id, + join(op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), ", ", + to_unpacked_expression(op2), ", ", to_unpacked_expression(op3), ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); +} + +void CompilerGLSL::emit_bitfield_insert_op(uint32_t result_type, uint32_t result_id, uint32_t op0, uint32_t op1, + uint32_t op2, uint32_t op3, const char *op, + SPIRType::BaseType offset_count_type) +{ + // Only need to cast offset/count arguments. Types of base/insert must be same as result type, + // and bitfieldInsert is sign invariant. + bool forward = should_forward(op0) && should_forward(op1) && should_forward(op2) && should_forward(op3); + + auto op0_expr = to_unpacked_expression(op0); + auto op1_expr = to_unpacked_expression(op1); + auto op2_expr = to_unpacked_expression(op2); + auto op3_expr = to_unpacked_expression(op3); + + assert(offset_count_type == SPIRType::UInt || offset_count_type == SPIRType::Int); + SPIRType target_type { OpTypeInt }; + target_type.width = 32; + target_type.vecsize = 1; + target_type.basetype = offset_count_type; + + if (expression_type(op2).basetype != offset_count_type) + { + // Value-cast here. Input might be 16-bit. GLSL requires int. + op2_expr = join(type_to_glsl_constructor(target_type), "(", op2_expr, ")"); + } + + if (expression_type(op3).basetype != offset_count_type) + { + // Value-cast here. Input might be 16-bit. GLSL requires int. + op3_expr = join(type_to_glsl_constructor(target_type), "(", op3_expr, ")"); + } + + emit_op(result_type, result_id, join(op, "(", op0_expr, ", ", op1_expr, ", ", op2_expr, ", ", op3_expr, ")"), + forward); + + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + inherit_expression_dependencies(result_id, op2); + inherit_expression_dependencies(result_id, op3); +} + +string CompilerGLSL::legacy_tex_op(const std::string &op, const SPIRType &imgtype, uint32_t tex) +{ + const char *type; + switch (imgtype.image.dim) + { + case spv::Dim1D: + // Force 2D path for ES. + if (options.es) + type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; + else + type = (imgtype.image.arrayed && !options.es) ? "1DArray" : "1D"; + break; + case spv::Dim2D: + type = (imgtype.image.arrayed && !options.es) ? "2DArray" : "2D"; + break; + case spv::Dim3D: + type = "3D"; + break; + case spv::DimCube: + type = "Cube"; + break; + case spv::DimRect: + type = "2DRect"; + break; + case spv::DimBuffer: + type = "Buffer"; + break; + case spv::DimSubpassData: + type = "2D"; + break; + default: + type = ""; + break; + } + + // In legacy GLSL, an extension is required for textureLod in the fragment + // shader or textureGrad anywhere. + bool legacy_lod_ext = false; + auto &execution = get_entry_point(); + if (op == "textureGrad" || op == "textureProjGrad" || + ((op == "textureLod" || op == "textureProjLod") && execution.model != ExecutionModelVertex)) + { + if (is_legacy_es()) + { + legacy_lod_ext = true; + require_extension_internal("GL_EXT_shader_texture_lod"); + } + else if (is_legacy_desktop()) + require_extension_internal("GL_ARB_shader_texture_lod"); + } + + if (op == "textureLodOffset" || op == "textureProjLodOffset") + { + if (is_legacy_es()) + SPIRV_CROSS_THROW(join(op, " not allowed in legacy ES")); + + require_extension_internal("GL_EXT_gpu_shader4"); + } + + // GLES has very limited support for shadow samplers. + // Basically shadow2D and shadow2DProj work through EXT_shadow_samplers, + // everything else can just throw + bool is_comparison = is_depth_image(imgtype, tex); + if (is_comparison && is_legacy_es()) + { + if (op == "texture" || op == "textureProj") + require_extension_internal("GL_EXT_shadow_samplers"); + else + SPIRV_CROSS_THROW(join(op, " not allowed on depth samplers in legacy ES")); + + if (imgtype.image.dim == spv::DimCube) + return "shadowCubeNV"; + } + + if (op == "textureSize") + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("textureSize not supported in legacy ES"); + if (is_comparison) + SPIRV_CROSS_THROW("textureSize not supported on shadow sampler in legacy GLSL"); + require_extension_internal("GL_EXT_gpu_shader4"); + } + + if (op == "texelFetch" && is_legacy_es()) + SPIRV_CROSS_THROW("texelFetch not supported in legacy ES"); + + bool is_es_and_depth = is_legacy_es() && is_comparison; + std::string type_prefix = is_comparison ? "shadow" : "texture"; + + if (op == "texture") + return is_es_and_depth ? join(type_prefix, type, "EXT") : join(type_prefix, type); + else if (op == "textureLod") + return join(type_prefix, type, legacy_lod_ext ? "LodEXT" : "Lod"); + else if (op == "textureProj") + return join(type_prefix, type, is_es_and_depth ? "ProjEXT" : "Proj"); + else if (op == "textureGrad") + return join(type_prefix, type, is_legacy_es() ? "GradEXT" : is_legacy_desktop() ? "GradARB" : "Grad"); + else if (op == "textureProjLod") + return join(type_prefix, type, legacy_lod_ext ? "ProjLodEXT" : "ProjLod"); + else if (op == "textureLodOffset") + return join(type_prefix, type, "LodOffset"); + else if (op == "textureProjGrad") + return join(type_prefix, type, + is_legacy_es() ? "ProjGradEXT" : is_legacy_desktop() ? "ProjGradARB" : "ProjGrad"); + else if (op == "textureProjLodOffset") + return join(type_prefix, type, "ProjLodOffset"); + else if (op == "textureSize") + return join("textureSize", type); + else if (op == "texelFetch") + return join("texelFetch", type); + else + { + SPIRV_CROSS_THROW(join("Unsupported legacy texture op: ", op)); + } +} + +bool CompilerGLSL::to_trivial_mix_op(const SPIRType &type, string &op, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto *cleft = maybe_get<SPIRConstant>(left); + auto *cright = maybe_get<SPIRConstant>(right); + auto &lerptype = expression_type(lerp); + + // If our targets aren't constants, we cannot use construction. + if (!cleft || !cright) + return false; + + // If our targets are spec constants, we cannot use construction. + if (cleft->specialization || cright->specialization) + return false; + + auto &value_type = get<SPIRType>(cleft->constant_type); + + if (lerptype.basetype != SPIRType::Boolean) + return false; + if (value_type.basetype == SPIRType::Struct || is_array(value_type)) + return false; + if (!backend.use_constructor_splatting && value_type.vecsize != lerptype.vecsize) + return false; + + // Only valid way in SPIR-V 1.4 to use matrices in select is a scalar select. + // matrix(scalar) constructor fills in diagnonals, so gets messy very quickly. + // Just avoid this case. + if (value_type.columns > 1) + return false; + + // If our bool selects between 0 and 1, we can cast from bool instead, making our trivial constructor. + bool ret = true; + for (uint32_t row = 0; ret && row < value_type.vecsize; row++) + { + switch (type.basetype) + { + case SPIRType::Short: + case SPIRType::UShort: + ret = cleft->scalar_u16(0, row) == 0 && cright->scalar_u16(0, row) == 1; + break; + + case SPIRType::Int: + case SPIRType::UInt: + ret = cleft->scalar(0, row) == 0 && cright->scalar(0, row) == 1; + break; + + case SPIRType::Half: + ret = cleft->scalar_f16(0, row) == 0.0f && cright->scalar_f16(0, row) == 1.0f; + break; + + case SPIRType::Float: + ret = cleft->scalar_f32(0, row) == 0.0f && cright->scalar_f32(0, row) == 1.0f; + break; + + case SPIRType::Double: + ret = cleft->scalar_f64(0, row) == 0.0 && cright->scalar_f64(0, row) == 1.0; + break; + + case SPIRType::Int64: + case SPIRType::UInt64: + ret = cleft->scalar_u64(0, row) == 0 && cright->scalar_u64(0, row) == 1; + break; + + default: + ret = false; + break; + } + } + + if (ret) + op = type_to_glsl_constructor(type); + return ret; +} + +string CompilerGLSL::to_ternary_expression(const SPIRType &restype, uint32_t select, uint32_t true_value, + uint32_t false_value) +{ + string expr; + auto &lerptype = expression_type(select); + + if (lerptype.vecsize == 1) + expr = join(to_enclosed_expression(select), " ? ", to_enclosed_pointer_expression(true_value), " : ", + to_enclosed_pointer_expression(false_value)); + else + { + auto swiz = [this](uint32_t expression, uint32_t i) { return to_extract_component_expression(expression, i); }; + + expr = type_to_glsl_constructor(restype); + expr += "("; + for (uint32_t i = 0; i < restype.vecsize; i++) + { + expr += swiz(select, i); + expr += " ? "; + expr += swiz(true_value, i); + expr += " : "; + expr += swiz(false_value, i); + if (i + 1 < restype.vecsize) + expr += ", "; + } + expr += ")"; + } + + return expr; +} + +void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp) +{ + auto &lerptype = expression_type(lerp); + auto &restype = get<SPIRType>(result_type); + + // If this results in a variable pointer, assume it may be written through. + if (restype.pointer) + { + register_write(left); + register_write(right); + } + + string mix_op; + bool has_boolean_mix = *backend.boolean_mix_function && + ((options.es && options.version >= 310) || (!options.es && options.version >= 450)); + bool trivial_mix = to_trivial_mix_op(restype, mix_op, left, right, lerp); + + // Cannot use boolean mix when the lerp argument is just one boolean, + // fall back to regular trinary statements. + if (lerptype.vecsize == 1) + has_boolean_mix = false; + + // If we can reduce the mix to a simple cast, do so. + // This helps for cases like int(bool), uint(bool) which is implemented with + // OpSelect bool 1 0. + if (trivial_mix) + { + emit_unary_func_op(result_type, id, lerp, mix_op.c_str()); + } + else if (!has_boolean_mix && lerptype.basetype == SPIRType::Boolean) + { + // Boolean mix not supported on desktop without extension. + // Was added in OpenGL 4.5 with ES 3.1 compat. + // + // Could use GL_EXT_shader_integer_mix on desktop at least, + // but Apple doesn't support it. :( + // Just implement it as ternary expressions. + auto expr = to_ternary_expression(get<SPIRType>(result_type), lerp, right, left); + emit_op(result_type, id, expr, should_forward(left) && should_forward(right) && should_forward(lerp)); + inherit_expression_dependencies(id, left); + inherit_expression_dependencies(id, right); + inherit_expression_dependencies(id, lerp); + } + else if (lerptype.basetype == SPIRType::Boolean) + emit_trinary_func_op(result_type, id, left, right, lerp, backend.boolean_mix_function); + else + emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); +} + +string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id) +{ + // Keep track of the array indices we have used to load the image. + // We'll need to use the same array index into the combined image sampler array. + auto image_expr = to_non_uniform_aware_expression(image_id); + string array_expr; + auto array_index = image_expr.find_first_of('['); + if (array_index != string::npos) + array_expr = image_expr.substr(array_index, string::npos); + + auto &args = current_function->arguments; + + // For GLSL and ESSL targets, we must enumerate all possible combinations for sampler2D(texture2D, sampler) and redirect + // all possible combinations into new sampler2D uniforms. + auto *image = maybe_get_backing_variable(image_id); + auto *samp = maybe_get_backing_variable(samp_id); + if (image) + image_id = image->self; + if (samp) + samp_id = samp->self; + + auto image_itr = find_if(begin(args), end(args), + [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; }); + + auto sampler_itr = find_if(begin(args), end(args), + [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; }); + + if (image_itr != end(args) || sampler_itr != end(args)) + { + // If any parameter originates from a parameter, we will find it in our argument list. + bool global_image = image_itr == end(args); + bool global_sampler = sampler_itr == end(args); + VariableID iid = global_image ? image_id : VariableID(uint32_t(image_itr - begin(args))); + VariableID sid = global_sampler ? samp_id : VariableID(uint32_t(sampler_itr - begin(args))); + + auto &combined = current_function->combined_parameters; + auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { + return p.global_image == global_image && p.global_sampler == global_sampler && p.image_id == iid && + p.sampler_id == sid; + }); + + if (itr != end(combined)) + return to_expression(itr->id) + array_expr; + else + { + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler parameter, was " + "build_combined_image_samplers() used " + "before compile() was called?"); + } + } + else + { + // For global sampler2D, look directly at the global remapping table. + auto &mapping = combined_image_samplers; + auto itr = find_if(begin(mapping), end(mapping), [image_id, samp_id](const CombinedImageSampler &combined) { + return combined.image_id == image_id && combined.sampler_id == samp_id; + }); + + if (itr != end(combined_image_samplers)) + return to_expression(itr->combined_id) + array_expr; + else + { + SPIRV_CROSS_THROW("Cannot find mapping for combined sampler, was build_combined_image_samplers() used " + "before compile() was called?"); + } + } +} + +bool CompilerGLSL::is_supported_subgroup_op_in_opengl(spv::Op op, const uint32_t *ops) +{ + switch (op) + { + case OpGroupNonUniformElect: + case OpGroupNonUniformBallot: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpControlBarrier: + case OpMemoryBarrier: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformInverseBallot: + return true; + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMul: + { + const GroupOperation operation = static_cast<GroupOperation>(ops[3]); + if (operation == GroupOperationReduce || operation == GroupOperationInclusiveScan || + operation == GroupOperationExclusiveScan) + { + return true; + } + else + { + return false; + } + } + default: + return false; + } +} + +void CompilerGLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) +{ + if (options.vulkan_semantics && combined_image_samplers.empty()) + { + emit_binary_func_op(result_type, result_id, image_id, samp_id, + type_to_glsl(get<SPIRType>(result_type), result_id).c_str()); + } + else + { + // Make sure to suppress usage tracking. It is illegal to create temporaries of opaque types. + emit_op(result_type, result_id, to_combined_image_sampler(image_id, samp_id), true, true); + } + + // Make sure to suppress usage tracking and any expression invalidation. + // It is illegal to create temporaries of opaque types. + forwarded_temporaries.erase(result_id); +} + +static inline bool image_opcode_is_sample_no_dref(Op op) +{ + switch (op) + { + case OpImageSampleExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageFetch: + case OpImageRead: + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseFetch: + case OpImageSparseRead: + return true; + + default: + return false; + } +} + +void CompilerGLSL::emit_sparse_feedback_temporaries(uint32_t result_type_id, uint32_t id, uint32_t &feedback_id, + uint32_t &texel_id) +{ + // Need to allocate two temporaries. + if (options.es) + SPIRV_CROSS_THROW("Sparse texture feedback is not supported on ESSL."); + require_extension_internal("GL_ARB_sparse_texture2"); + + auto &temps = extra_sub_expressions[id]; + if (temps == 0) + temps = ir.increase_bound_by(2); + + feedback_id = temps + 0; + texel_id = temps + 1; + + auto &return_type = get<SPIRType>(result_type_id); + if (return_type.basetype != SPIRType::Struct || return_type.member_types.size() != 2) + SPIRV_CROSS_THROW("Invalid return type for sparse feedback."); + emit_uninitialized_temporary(return_type.member_types[0], feedback_id); + emit_uninitialized_temporary(return_type.member_types[1], texel_id); +} + +uint32_t CompilerGLSL::get_sparse_feedback_texel_id(uint32_t id) const +{ + auto itr = extra_sub_expressions.find(id); + if (itr == extra_sub_expressions.end()) + return 0; + else + return itr->second + 1; +} + +void CompilerGLSL::emit_texture_op(const Instruction &i, bool sparse) +{ + auto *ops = stream(i); + auto op = static_cast<Op>(i.op); + + SmallVector<uint32_t> inherited_expressions; + + uint32_t result_type_id = ops[0]; + uint32_t id = ops[1]; + auto &return_type = get<SPIRType>(result_type_id); + + uint32_t sparse_code_id = 0; + uint32_t sparse_texel_id = 0; + if (sparse) + emit_sparse_feedback_temporaries(result_type_id, id, sparse_code_id, sparse_texel_id); + + bool forward = false; + string expr = to_texture_op(i, sparse, &forward, inherited_expressions); + + if (sparse) + { + statement(to_expression(sparse_code_id), " = ", expr, ";"); + expr = join(type_to_glsl(return_type), "(", to_expression(sparse_code_id), ", ", to_expression(sparse_texel_id), + ")"); + forward = true; + inherited_expressions.clear(); + } + + emit_op(result_type_id, id, expr, forward); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); + + // Do not register sparse ops as control dependent as they are always lowered to a temporary. + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(id); + break; + + default: + break; + } +} + +std::string CompilerGLSL::to_texture_op(const Instruction &i, bool sparse, bool *forward, + SmallVector<uint32_t> &inherited_expressions) +{ + auto *ops = stream(i); + auto op = static_cast<Op>(i.op); + uint32_t length = i.length; + + uint32_t result_type_id = ops[0]; + VariableID img = ops[2]; + uint32_t coord = ops[3]; + uint32_t dref = 0; + uint32_t comp = 0; + bool gather = false; + bool proj = false; + bool fetch = false; + bool nonuniform_expression = false; + const uint32_t *opt = nullptr; + + auto &result_type = get<SPIRType>(result_type_id); + + inherited_expressions.push_back(coord); + if (has_decoration(img, DecorationNonUniform) && !maybe_get_backing_variable(img)) + nonuniform_expression = true; + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + break; + + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + proj = true; + break; + + case OpImageDrefGather: + case OpImageSparseDrefGather: + dref = ops[4]; + opt = &ops[5]; + length -= 5; + gather = true; + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("textureGather requires ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("textureGather with depth compare requires GLSL 400."); + break; + + case OpImageGather: + case OpImageSparseGather: + comp = ops[4]; + opt = &ops[5]; + length -= 5; + gather = true; + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("textureGather requires ESSL 310."); + else if (!options.es && options.version < 400) + { + if (!expression_is_constant_null(comp)) + SPIRV_CROSS_THROW("textureGather with component requires GLSL 400."); + require_extension_internal("GL_ARB_texture_gather"); + } + break; + + case OpImageFetch: + case OpImageSparseFetch: + case OpImageRead: // Reads == fetches in Metal (other langs will not get here) + opt = &ops[4]; + length -= 4; + fetch = true; + break; + + case OpImageSampleProjImplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleProjExplicitLod: + opt = &ops[4]; + length -= 4; + proj = true; + break; + + default: + opt = &ops[4]; + length -= 4; + break; + } + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img); + auto &imgtype = get<SPIRType>(type.self); + + uint32_t coord_components = 0; + switch (imgtype.image.dim) + { + case spv::Dim1D: + coord_components = 1; + break; + case spv::Dim2D: + coord_components = 2; + break; + case spv::Dim3D: + coord_components = 3; + break; + case spv::DimCube: + coord_components = 3; + break; + case spv::DimBuffer: + coord_components = 1; + break; + default: + coord_components = 2; + break; + } + + if (dref) + inherited_expressions.push_back(dref); + + if (proj) + coord_components++; + if (imgtype.image.arrayed) + coord_components++; + + uint32_t bias = 0; + uint32_t lod = 0; + uint32_t grad_x = 0; + uint32_t grad_y = 0; + uint32_t coffset = 0; + uint32_t offset = 0; + uint32_t coffsets = 0; + uint32_t sample = 0; + uint32_t minlod = 0; + uint32_t flags = 0; + + if (length) + { + flags = *opt++; + length--; + } + + auto test = [&](uint32_t &v, uint32_t flag) { + if (length && (flags & flag)) + { + v = *opt++; + inherited_expressions.push_back(v); + length--; + } + }; + + test(bias, ImageOperandsBiasMask); + test(lod, ImageOperandsLodMask); + test(grad_x, ImageOperandsGradMask); + test(grad_y, ImageOperandsGradMask); + test(coffset, ImageOperandsConstOffsetMask); + test(offset, ImageOperandsOffsetMask); + test(coffsets, ImageOperandsConstOffsetsMask); + test(sample, ImageOperandsSampleMask); + test(minlod, ImageOperandsMinLodMask); + + TextureFunctionBaseArguments base_args = {}; + base_args.img = img; + base_args.imgtype = &imgtype; + base_args.is_fetch = fetch != 0; + base_args.is_gather = gather != 0; + base_args.is_proj = proj != 0; + + string expr; + TextureFunctionNameArguments name_args = {}; + + name_args.base = base_args; + name_args.has_array_offsets = coffsets != 0; + name_args.has_offset = coffset != 0 || offset != 0; + name_args.has_grad = grad_x != 0 || grad_y != 0; + name_args.has_dref = dref != 0; + name_args.is_sparse_feedback = sparse; + name_args.has_min_lod = minlod != 0; + name_args.lod = lod; + expr += to_function_name(name_args); + expr += "("; + + uint32_t sparse_texel_id = 0; + if (sparse) + sparse_texel_id = get_sparse_feedback_texel_id(ops[1]); + + TextureFunctionArguments args = {}; + args.base = base_args; + args.coord = coord; + args.coord_components = coord_components; + args.dref = dref; + args.grad_x = grad_x; + args.grad_y = grad_y; + args.lod = lod; + args.has_array_offsets = coffsets != 0; + + if (coffsets) + args.offset = coffsets; + else if (coffset) + args.offset = coffset; + else + args.offset = offset; + + args.bias = bias; + args.component = comp; + args.sample = sample; + args.sparse_texel = sparse_texel_id; + args.min_lod = minlod; + args.nonuniform_expression = nonuniform_expression; + expr += to_function_args(args, forward); + expr += ")"; + + // texture(samplerXShadow) returns float. shadowX() returns vec4, but only in desktop GLSL. Swizzle here. + if (is_legacy() && !options.es && is_depth_image(imgtype, img)) + expr += ".r"; + + // Sampling from a texture which was deduced to be a depth image, might actually return 1 component here. + // Remap back to 4 components as sampling opcodes expect. + if (backend.comparison_image_samples_scalar && image_opcode_is_sample_no_dref(op)) + { + bool image_is_depth = false; + const auto *combined = maybe_get<SPIRCombinedImageSampler>(img); + VariableID image_id = combined ? combined->image : img; + + if (combined && is_depth_image(imgtype, combined->image)) + image_is_depth = true; + else if (is_depth_image(imgtype, img)) + image_is_depth = true; + + // We must also check the backing variable for the image. + // We might have loaded an OpImage, and used that handle for two different purposes. + // Once with comparison, once without. + auto *image_variable = maybe_get_backing_variable(image_id); + if (image_variable && is_depth_image(get<SPIRType>(image_variable->basetype), image_variable->self)) + image_is_depth = true; + + if (image_is_depth) + expr = remap_swizzle(result_type, 1, expr); + } + + if (!sparse && !backend.support_small_type_sampling_result && result_type.width < 32) + { + // Just value cast (narrowing) to expected type since we cannot rely on narrowing to work automatically. + // Hopefully compiler picks this up and converts the texturing instruction to the appropriate precision. + expr = join(type_to_glsl_constructor(result_type), "(", expr, ")"); + } + + // Deals with reads from MSL. We might need to downconvert to fewer components. + if (op == OpImageRead) + expr = remap_swizzle(result_type, 4, expr); + + return expr; +} + +bool CompilerGLSL::expression_is_constant_null(uint32_t id) const +{ + auto *c = maybe_get<SPIRConstant>(id); + if (!c) + return false; + return c->constant_is_null(); +} + +bool CompilerGLSL::expression_is_non_value_type_array(uint32_t ptr) +{ + auto &type = expression_type(ptr); + if (!is_array(get_pointee_type(type))) + return false; + + if (!backend.array_is_value_type) + return true; + + auto *var = maybe_get_backing_variable(ptr); + if (!var) + return false; + + auto &backed_type = get<SPIRType>(var->basetype); + return !backend.array_is_value_type_in_buffer_blocks && backed_type.basetype == SPIRType::Struct && + has_member_decoration(backed_type.self, 0, DecorationOffset); +} + +// Returns the function name for a texture sampling function for the specified image and sampling characteristics. +// For some subclasses, the function is a method on the specified image. +string CompilerGLSL::to_function_name(const TextureFunctionNameArguments &args) +{ + if (args.has_min_lod) + { + if (options.es) + SPIRV_CROSS_THROW("Sparse residency is not supported in ESSL."); + require_extension_internal("GL_ARB_sparse_texture_clamp"); + } + + string fname; + auto &imgtype = *args.base.imgtype; + VariableID tex = args.base.img; + + // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. + // To emulate this, we will have to use textureGrad with a constant gradient of 0. + // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. + // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. + bool workaround_lod_array_shadow_as_grad = false; + if (((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && + is_depth_image(imgtype, tex) && args.lod && !args.base.is_fetch) + { + if (!expression_is_constant_null(args.lod)) + { + SPIRV_CROSS_THROW("textureLod on sampler2DArrayShadow is not constant 0.0. This cannot be " + "expressed in GLSL."); + } + workaround_lod_array_shadow_as_grad = true; + } + + if (args.is_sparse_feedback) + fname += "sparse"; + + if (args.base.is_fetch) + fname += args.is_sparse_feedback ? "TexelFetch" : "texelFetch"; + else + { + fname += args.is_sparse_feedback ? "Texture" : "texture"; + + if (args.base.is_gather) + fname += "Gather"; + if (args.has_array_offsets) + fname += "Offsets"; + if (args.base.is_proj) + fname += "Proj"; + if (args.has_grad || workaround_lod_array_shadow_as_grad) + fname += "Grad"; + if (args.lod != 0 && !workaround_lod_array_shadow_as_grad) + fname += "Lod"; + } + + if (args.has_offset) + fname += "Offset"; + + if (args.has_min_lod) + fname += "Clamp"; + + if (args.is_sparse_feedback || args.has_min_lod) + fname += "ARB"; + + return (is_legacy() && !args.base.is_gather) ? legacy_tex_op(fname, imgtype, tex) : fname; +} + +std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) +{ + auto *var = maybe_get_backing_variable(id); + + // If we are fetching from a plain OpTypeImage, we must combine with a dummy sampler in GLSL. + // In Vulkan GLSL, we can make use of the newer GL_EXT_samplerless_texture_functions. + if (var) + { + auto &type = get<SPIRType>(var->basetype); + if (type.basetype == SPIRType::Image && type.image.sampled == 1 && type.image.dim != DimBuffer) + { + if (options.vulkan_semantics) + { + if (dummy_sampler_id) + { + // Don't need to consider Shadow state since the dummy sampler is always non-shadow. + auto sampled_type = type; + sampled_type.basetype = SPIRType::SampledImage; + return join(type_to_glsl(sampled_type), "(", to_non_uniform_aware_expression(id), ", ", + to_expression(dummy_sampler_id), ")"); + } + else + { + // Newer glslang supports this extension to deal with texture2D as argument to texture functions. + require_extension_internal("GL_EXT_samplerless_texture_functions"); + } + } + else + { + if (!dummy_sampler_id) + SPIRV_CROSS_THROW("Cannot find dummy sampler ID. Was " + "build_dummy_sampler_for_combined_images() called?"); + + return to_combined_image_sampler(id, dummy_sampler_id); + } + } + } + + return to_non_uniform_aware_expression(id); +} + +// Returns the function args for a texture sampling function for the specified image and sampling characteristics. +string CompilerGLSL::to_function_args(const TextureFunctionArguments &args, bool *p_forward) +{ + VariableID img = args.base.img; + auto &imgtype = *args.base.imgtype; + + string farg_str; + if (args.base.is_fetch) + farg_str = convert_separate_image_to_expression(img); + else + farg_str = to_non_uniform_aware_expression(img); + + if (args.nonuniform_expression && farg_str.find_first_of('[') != string::npos) + { + // Only emit nonuniformEXT() wrapper if the underlying expression is arrayed in some way. + farg_str = join(backend.nonuniform_qualifier, "(", farg_str, ")"); + } + + bool swizz_func = backend.swizzle_is_function; + auto swizzle = [swizz_func](uint32_t comps, uint32_t in_comps) -> const char * { + if (comps == in_comps) + return ""; + + switch (comps) + { + case 1: + return ".x"; + case 2: + return swizz_func ? ".xy()" : ".xy"; + case 3: + return swizz_func ? ".xyz()" : ".xyz"; + default: + return ""; + } + }; + + bool forward = should_forward(args.coord); + + // The IR can give us more components than we need, so chop them off as needed. + auto swizzle_expr = swizzle(args.coord_components, expression_type(args.coord).vecsize); + // Only enclose the UV expression if needed. + auto coord_expr = + (*swizzle_expr == '\0') ? to_expression(args.coord) : (to_enclosed_expression(args.coord) + swizzle_expr); + + // texelFetch only takes int, not uint. + auto &coord_type = expression_type(args.coord); + if (coord_type.basetype == SPIRType::UInt) + { + auto expected_type = coord_type; + expected_type.vecsize = args.coord_components; + expected_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(expected_type, coord_type.basetype, coord_expr); + } + + // textureLod on sampler2DArrayShadow and samplerCubeShadow does not exist in GLSL for some reason. + // To emulate this, we will have to use textureGrad with a constant gradient of 0. + // The workaround will assert that the LOD is in fact constant 0, or we cannot emit correct code. + // This happens for HLSL SampleCmpLevelZero on Texture2DArray and TextureCube. + bool workaround_lod_array_shadow_as_grad = + ((imgtype.image.arrayed && imgtype.image.dim == Dim2D) || imgtype.image.dim == DimCube) && + is_depth_image(imgtype, img) && args.lod != 0 && !args.base.is_fetch; + + if (args.dref) + { + forward = forward && should_forward(args.dref); + + // SPIR-V splits dref and coordinate. + if (args.base.is_gather || + args.coord_components == 4) // GLSL also splits the arguments in two. Same for textureGather. + { + farg_str += ", "; + farg_str += to_expression(args.coord); + farg_str += ", "; + farg_str += to_expression(args.dref); + } + else if (args.base.is_proj) + { + // Have to reshuffle so we get vec4(coord, dref, proj), special case. + // Other shading languages splits up the arguments for coord and compare value like SPIR-V. + // The coordinate type for textureProj shadow is always vec4 even for sampler1DShadow. + farg_str += ", vec4("; + + if (imgtype.image.dim == Dim1D) + { + // Could reuse coord_expr, but we will mess up the temporary usage checking. + farg_str += to_enclosed_expression(args.coord) + ".x"; + farg_str += ", "; + farg_str += "0.0, "; + farg_str += to_expression(args.dref); + farg_str += ", "; + farg_str += to_enclosed_expression(args.coord) + ".y)"; + } + else if (imgtype.image.dim == Dim2D) + { + // Could reuse coord_expr, but we will mess up the temporary usage checking. + farg_str += to_enclosed_expression(args.coord) + (swizz_func ? ".xy()" : ".xy"); + farg_str += ", "; + farg_str += to_expression(args.dref); + farg_str += ", "; + farg_str += to_enclosed_expression(args.coord) + ".z)"; + } + else + SPIRV_CROSS_THROW("Invalid type for textureProj with shadow."); + } + else + { + // Create a composite which merges coord/dref into a single vector. + auto type = expression_type(args.coord); + type.vecsize = args.coord_components + 1; + if (imgtype.image.dim == Dim1D && options.es) + type.vecsize++; + farg_str += ", "; + farg_str += type_to_glsl_constructor(type); + farg_str += "("; + + if (imgtype.image.dim == Dim1D && options.es) + { + if (imgtype.image.arrayed) + { + farg_str += enclose_expression(coord_expr) + ".x"; + farg_str += ", 0.0, "; + farg_str += enclose_expression(coord_expr) + ".y"; + } + else + { + farg_str += coord_expr; + farg_str += ", 0.0"; + } + } + else + farg_str += coord_expr; + + farg_str += ", "; + farg_str += to_expression(args.dref); + farg_str += ")"; + } + } + else + { + if (imgtype.image.dim == Dim1D && options.es) + { + // Have to fake a second coordinate. + if (type_is_floating_point(coord_type)) + { + // Cannot mix proj and array. + if (imgtype.image.arrayed || args.base.is_proj) + { + coord_expr = join("vec3(", enclose_expression(coord_expr), ".x, 0.0, ", + enclose_expression(coord_expr), ".y)"); + } + else + coord_expr = join("vec2(", coord_expr, ", 0.0)"); + } + else + { + if (imgtype.image.arrayed) + { + coord_expr = join("ivec3(", enclose_expression(coord_expr), + ".x, 0, ", + enclose_expression(coord_expr), ".y)"); + } + else + coord_expr = join("ivec2(", coord_expr, ", 0)"); + } + } + + farg_str += ", "; + farg_str += coord_expr; + } + + if (args.grad_x || args.grad_y) + { + forward = forward && should_forward(args.grad_x); + forward = forward && should_forward(args.grad_y); + farg_str += ", "; + farg_str += to_expression(args.grad_x); + farg_str += ", "; + farg_str += to_expression(args.grad_y); + } + + if (args.lod) + { + if (workaround_lod_array_shadow_as_grad) + { + // Implement textureGrad() instead. LOD == 0.0 is implemented as gradient of 0.0. + // Implementing this as plain texture() is not safe on some implementations. + if (imgtype.image.dim == Dim2D) + farg_str += ", vec2(0.0), vec2(0.0)"; + else if (imgtype.image.dim == DimCube) + farg_str += ", vec3(0.0), vec3(0.0)"; + } + else + { + forward = forward && should_forward(args.lod); + farg_str += ", "; + + // Lod expression for TexelFetch in GLSL must be int, and only int. + if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) + farg_str += bitcast_expression(SPIRType::Int, args.lod); + else + farg_str += to_expression(args.lod); + } + } + else if (args.base.is_fetch && imgtype.image.dim != DimBuffer && !imgtype.image.ms) + { + // Lod argument is optional in OpImageFetch, but we require a LOD value, pick 0 as the default. + farg_str += ", 0"; + } + + if (args.offset) + { + forward = forward && should_forward(args.offset); + farg_str += ", "; + farg_str += bitcast_expression(SPIRType::Int, args.offset); + } + + if (args.sample) + { + farg_str += ", "; + farg_str += bitcast_expression(SPIRType::Int, args.sample); + } + + if (args.min_lod) + { + farg_str += ", "; + farg_str += to_expression(args.min_lod); + } + + if (args.sparse_texel) + { + // Sparse texel output parameter comes after everything else, except it's before the optional, component/bias arguments. + farg_str += ", "; + farg_str += to_expression(args.sparse_texel); + } + + if (args.bias) + { + forward = forward && should_forward(args.bias); + farg_str += ", "; + farg_str += to_expression(args.bias); + } + + if (args.component && !expression_is_constant_null(args.component)) + { + forward = forward && should_forward(args.component); + farg_str += ", "; + farg_str += bitcast_expression(SPIRType::Int, args.component); + } + + *p_forward = forward; + + return farg_str; +} + +Op CompilerGLSL::get_remapped_spirv_op(Op op) const +{ + if (options.relax_nan_checks) + { + switch (op) + { + case OpFUnordLessThan: + op = OpFOrdLessThan; + break; + case OpFUnordLessThanEqual: + op = OpFOrdLessThanEqual; + break; + case OpFUnordGreaterThan: + op = OpFOrdGreaterThan; + break; + case OpFUnordGreaterThanEqual: + op = OpFOrdGreaterThanEqual; + break; + case OpFUnordEqual: + op = OpFOrdEqual; + break; + case OpFOrdNotEqual: + op = OpFUnordNotEqual; + break; + + default: + break; + } + } + + return op; +} + +GLSLstd450 CompilerGLSL::get_remapped_glsl_op(GLSLstd450 std450_op) const +{ + // Relax to non-NaN aware opcodes. + if (options.relax_nan_checks) + { + switch (std450_op) + { + case GLSLstd450NClamp: + std450_op = GLSLstd450FClamp; + break; + case GLSLstd450NMin: + std450_op = GLSLstd450FMin; + break; + case GLSLstd450NMax: + std450_op = GLSLstd450FMax; + break; + default: + break; + } + } + + return std450_op; +} + +void CompilerGLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, uint32_t length) +{ + auto op = static_cast<GLSLstd450>(eop); + + if (is_legacy() && is_unsigned_glsl_opcode(op)) + SPIRV_CROSS_THROW("Unsigned integers are not supported on legacy GLSL targets."); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_glsl_instruction(op, args, length); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + op = get_remapped_glsl_op(op); + + switch (op) + { + // FP fiddling + case GLSLstd450Round: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "round"); + else + { + auto op0 = to_enclosed_expression(args[0]); + auto &op0_type = expression_type(args[0]); + auto expr = join("floor(", op0, " + ", type_to_glsl_constructor(op0_type), "(0.5))"); + bool forward = should_forward(args[0]); + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + } + break; + + case GLSLstd450RoundEven: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "roundEven"); + else if (!options.es) + { + // This extension provides round() with round-to-even semantics. + require_extension_internal("GL_EXT_gpu_shader4"); + emit_unary_func_op(result_type, id, args[0], "round"); + } + else + SPIRV_CROSS_THROW("roundEven supported only in ESSL 300."); + break; + + case GLSLstd450Trunc: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "trunc"); + else + { + // Implement by value-casting to int and back. + bool forward = should_forward(args[0]); + auto op0 = to_unpacked_expression(args[0]); + auto &op0_type = expression_type(args[0]); + auto via_type = op0_type; + via_type.basetype = SPIRType::Int; + auto expr = join(type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", op0, "))"); + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + } + break; + + case GLSLstd450SAbs: + emit_unary_func_op_cast(result_type, id, args[0], "abs", int_type, int_type); + break; + case GLSLstd450FAbs: + emit_unary_func_op(result_type, id, args[0], "abs"); + break; + case GLSLstd450SSign: + emit_unary_func_op_cast(result_type, id, args[0], "sign", int_type, int_type); + break; + case GLSLstd450FSign: + emit_unary_func_op(result_type, id, args[0], "sign"); + break; + case GLSLstd450Floor: + emit_unary_func_op(result_type, id, args[0], "floor"); + break; + case GLSLstd450Ceil: + emit_unary_func_op(result_type, id, args[0], "ceil"); + break; + case GLSLstd450Fract: + emit_unary_func_op(result_type, id, args[0], "fract"); + break; + case GLSLstd450Radians: + emit_unary_func_op(result_type, id, args[0], "radians"); + break; + case GLSLstd450Degrees: + emit_unary_func_op(result_type, id, args[0], "degrees"); + break; + case GLSLstd450Fma: + if ((!options.es && options.version < 400) || (options.es && options.version < 320)) + { + auto expr = join(to_enclosed_expression(args[0]), " * ", to_enclosed_expression(args[1]), " + ", + to_enclosed_expression(args[2])); + + emit_op(result_type, id, expr, + should_forward(args[0]) && should_forward(args[1]) && should_forward(args[2])); + for (uint32_t i = 0; i < 3; i++) + inherit_expression_dependencies(id, args[i]); + } + else + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "fma"); + break; + + case GLSLstd450Modf: + register_call_out_argument(args[1]); + if (!is_legacy()) + { + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "modf"); + } + else + { + //NB. legacy GLSL doesn't have trunc() either, so we do a value cast + auto &op1_type = expression_type(args[1]); + auto via_type = op1_type; + via_type.basetype = SPIRType::Int; + statement(to_expression(args[1]), " = ", + type_to_glsl(op1_type), "(", type_to_glsl(via_type), + "(", to_expression(args[0]), "));"); + emit_binary_op(result_type, id, args[0], args[1], "-"); + } + break; + + case GLSLstd450ModfStruct: + { + auto &type = get<SPIRType>(result_type); + emit_uninitialized_temporary_expression(result_type, id); + if (!is_legacy()) + { + statement(to_expression(id), ".", to_member_name(type, 0), " = ", "modf(", to_expression(args[0]), ", ", + to_expression(id), ".", to_member_name(type, 1), ");"); + } + else + { + //NB. legacy GLSL doesn't have trunc() either, so we do a value cast + auto &op0_type = expression_type(args[0]); + auto via_type = op0_type; + via_type.basetype = SPIRType::Int; + statement(to_expression(id), ".", to_member_name(type, 1), " = ", type_to_glsl(op0_type), + "(", type_to_glsl(via_type), "(", to_expression(args[0]), "));"); + statement(to_expression(id), ".", to_member_name(type, 0), " = ", to_enclosed_expression(args[0]), " - ", + to_expression(id), ".", to_member_name(type, 1), ";"); + } + break; + } + + // Minmax + case GLSLstd450UMin: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", uint_type, false); + break; + + case GLSLstd450SMin: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "min", int_type, false); + break; + + case GLSLstd450FMin: + emit_binary_func_op(result_type, id, args[0], args[1], "min"); + break; + + case GLSLstd450FMax: + emit_binary_func_op(result_type, id, args[0], args[1], "max"); + break; + + case GLSLstd450UMax: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", uint_type, false); + break; + + case GLSLstd450SMax: + emit_binary_func_op_cast(result_type, id, args[0], args[1], "max", int_type, false); + break; + + case GLSLstd450FClamp: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "clamp"); + break; + + case GLSLstd450UClamp: + emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", uint_type); + break; + + case GLSLstd450SClamp: + emit_trinary_func_op_cast(result_type, id, args[0], args[1], args[2], "clamp", int_type); + break; + + // Trig + case GLSLstd450Sin: + emit_unary_func_op(result_type, id, args[0], "sin"); + break; + case GLSLstd450Cos: + emit_unary_func_op(result_type, id, args[0], "cos"); + break; + case GLSLstd450Tan: + emit_unary_func_op(result_type, id, args[0], "tan"); + break; + case GLSLstd450Asin: + emit_unary_func_op(result_type, id, args[0], "asin"); + break; + case GLSLstd450Acos: + emit_unary_func_op(result_type, id, args[0], "acos"); + break; + case GLSLstd450Atan: + emit_unary_func_op(result_type, id, args[0], "atan"); + break; + case GLSLstd450Sinh: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "sinh"); + else + { + bool forward = should_forward(args[0]); + auto expr = join("(exp(", to_expression(args[0]), ") - exp(-", to_enclosed_expression(args[0]), ")) * 0.5"); + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + } + break; + case GLSLstd450Cosh: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "cosh"); + else + { + bool forward = should_forward(args[0]); + auto expr = join("(exp(", to_expression(args[0]), ") + exp(-", to_enclosed_expression(args[0]), ")) * 0.5"); + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + } + break; + case GLSLstd450Tanh: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "tanh"); + else + { + // Create temporaries to store the result of exp(arg) and exp(-arg). + uint32_t &ids = extra_sub_expressions[id]; + if (!ids) + { + ids = ir.increase_bound_by(2); + + // Inherit precision qualifier (legacy has no NoContraction). + if (has_decoration(id, DecorationRelaxedPrecision)) + { + set_decoration(ids, DecorationRelaxedPrecision); + set_decoration(ids + 1, DecorationRelaxedPrecision); + } + } + uint32_t epos_id = ids; + uint32_t eneg_id = ids + 1; + + emit_op(result_type, epos_id, join("exp(", to_expression(args[0]), ")"), false); + emit_op(result_type, eneg_id, join("exp(-", to_enclosed_expression(args[0]), ")"), false); + inherit_expression_dependencies(epos_id, args[0]); + inherit_expression_dependencies(eneg_id, args[0]); + + auto expr = join("(", to_enclosed_expression(epos_id), " - ", to_enclosed_expression(eneg_id), ") / " + "(", to_enclosed_expression(epos_id), " + ", to_enclosed_expression(eneg_id), ")"); + emit_op(result_type, id, expr, true); + inherit_expression_dependencies(id, epos_id); + inherit_expression_dependencies(id, eneg_id); + } + break; + case GLSLstd450Asinh: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "asinh"); + else + emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Asinh); + break; + case GLSLstd450Acosh: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "acosh"); + else + emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Acosh); + break; + case GLSLstd450Atanh: + if (!is_legacy()) + emit_unary_func_op(result_type, id, args[0], "atanh"); + else + emit_emulated_ahyper_op(result_type, id, args[0], GLSLstd450Atanh); + break; + case GLSLstd450Atan2: + emit_binary_func_op(result_type, id, args[0], args[1], "atan"); + break; + + // Exponentials + case GLSLstd450Pow: + emit_binary_func_op(result_type, id, args[0], args[1], "pow"); + break; + case GLSLstd450Exp: + emit_unary_func_op(result_type, id, args[0], "exp"); + break; + case GLSLstd450Log: + emit_unary_func_op(result_type, id, args[0], "log"); + break; + case GLSLstd450Exp2: + emit_unary_func_op(result_type, id, args[0], "exp2"); + break; + case GLSLstd450Log2: + emit_unary_func_op(result_type, id, args[0], "log2"); + break; + case GLSLstd450Sqrt: + emit_unary_func_op(result_type, id, args[0], "sqrt"); + break; + case GLSLstd450InverseSqrt: + emit_unary_func_op(result_type, id, args[0], "inversesqrt"); + break; + + // Matrix math + case GLSLstd450Determinant: + { + // No need to transpose - it doesn't affect the determinant + auto *e = maybe_get<SPIRExpression>(args[0]); + bool old_transpose = e && e->need_transpose; + if (old_transpose) + e->need_transpose = false; + + if (options.version < 150) // also matches ES 100 + { + auto &type = expression_type(args[0]); + assert(type.vecsize >= 2 && type.vecsize <= 4); + assert(type.vecsize == type.columns); + + // ARB_gpu_shader_fp64 needs GLSL 150, other types are not valid + if (type.basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Unsupported type for matrix determinant"); + + bool relaxed = has_decoration(id, DecorationRelaxedPrecision); + require_polyfill(static_cast<Polyfill>(PolyfillDeterminant2x2 << (type.vecsize - 2)), + relaxed); + emit_unary_func_op(result_type, id, args[0], + (options.es && relaxed) ? "spvDeterminantMP" : "spvDeterminant"); + } + else + emit_unary_func_op(result_type, id, args[0], "determinant"); + + if (old_transpose) + e->need_transpose = true; + break; + } + + case GLSLstd450MatrixInverse: + { + // The inverse of the transpose is the same as the transpose of + // the inverse, so we can just flip need_transpose of the result. + auto *a = maybe_get<SPIRExpression>(args[0]); + bool old_transpose = a && a->need_transpose; + if (old_transpose) + a->need_transpose = false; + + const char *func = "inverse"; + if (options.version < 140) // also matches ES 100 + { + auto &type = get<SPIRType>(result_type); + assert(type.vecsize >= 2 && type.vecsize <= 4); + assert(type.vecsize == type.columns); + + // ARB_gpu_shader_fp64 needs GLSL 150, other types are invalid + if (type.basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Unsupported type for matrix inverse"); + + bool relaxed = has_decoration(id, DecorationRelaxedPrecision); + require_polyfill(static_cast<Polyfill>(PolyfillMatrixInverse2x2 << (type.vecsize - 2)), + relaxed); + func = (options.es && relaxed) ? "spvInverseMP" : "spvInverse"; + } + + bool forward = should_forward(args[0]); + auto &e = emit_op(result_type, id, join(func, "(", to_unpacked_expression(args[0]), ")"), forward); + inherit_expression_dependencies(id, args[0]); + + if (old_transpose) + { + e.need_transpose = true; + a->need_transpose = true; + } + break; + } + + // Lerping + case GLSLstd450FMix: + case GLSLstd450IMix: + { + emit_mix_op(result_type, id, args[0], args[1], args[2]); + break; + } + case GLSLstd450Step: + emit_binary_func_op(result_type, id, args[0], args[1], "step"); + break; + case GLSLstd450SmoothStep: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "smoothstep"); + break; + + // Packing + case GLSLstd450Frexp: + register_call_out_argument(args[1]); + forced_temporaries.insert(id); + emit_binary_func_op(result_type, id, args[0], args[1], "frexp"); + break; + + case GLSLstd450FrexpStruct: + { + auto &type = get<SPIRType>(result_type); + emit_uninitialized_temporary_expression(result_type, id); + statement(to_expression(id), ".", to_member_name(type, 0), " = ", "frexp(", to_expression(args[0]), ", ", + to_expression(id), ".", to_member_name(type, 1), ");"); + break; + } + + case GLSLstd450Ldexp: + { + bool forward = should_forward(args[0]) && should_forward(args[1]); + + auto op0 = to_unpacked_expression(args[0]); + auto op1 = to_unpacked_expression(args[1]); + auto &op1_type = expression_type(args[1]); + if (op1_type.basetype != SPIRType::Int) + { + // Need a value cast here. + auto target_type = op1_type; + target_type.basetype = SPIRType::Int; + op1 = join(type_to_glsl_constructor(target_type), "(", op1, ")"); + } + + auto expr = join("ldexp(", op0, ", ", op1, ")"); + + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, args[0]); + inherit_expression_dependencies(id, args[1]); + break; + } + + case GLSLstd450PackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packSnorm4x8"); + break; + case GLSLstd450PackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "packUnorm4x8"); + break; + case GLSLstd450PackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packSnorm2x16"); + break; + case GLSLstd450PackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "packUnorm2x16"); + break; + case GLSLstd450PackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "packHalf2x16"); + break; + case GLSLstd450UnpackSnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm4x8"); + break; + case GLSLstd450UnpackUnorm4x8: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm4x8"); + break; + case GLSLstd450UnpackSnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackSnorm2x16"); + break; + case GLSLstd450UnpackUnorm2x16: + emit_unary_func_op(result_type, id, args[0], "unpackUnorm2x16"); + break; + case GLSLstd450UnpackHalf2x16: + emit_unary_func_op(result_type, id, args[0], "unpackHalf2x16"); + break; + + case GLSLstd450PackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "packDouble2x32"); + break; + case GLSLstd450UnpackDouble2x32: + emit_unary_func_op(result_type, id, args[0], "unpackDouble2x32"); + break; + + // Vector math + case GLSLstd450Length: + emit_unary_func_op(result_type, id, args[0], "length"); + break; + case GLSLstd450Distance: + emit_binary_func_op(result_type, id, args[0], args[1], "distance"); + break; + case GLSLstd450Cross: + emit_binary_func_op(result_type, id, args[0], args[1], "cross"); + break; + case GLSLstd450Normalize: + emit_unary_func_op(result_type, id, args[0], "normalize"); + break; + case GLSLstd450FaceForward: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "faceforward"); + break; + case GLSLstd450Reflect: + emit_binary_func_op(result_type, id, args[0], args[1], "reflect"); + break; + case GLSLstd450Refract: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "refract"); + break; + + // Bit-fiddling + case GLSLstd450FindILsb: + // findLSB always returns int. + emit_unary_func_op_cast(result_type, id, args[0], "findLSB", expression_type(args[0]).basetype, int_type); + break; + + case GLSLstd450FindSMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findMSB", int_type, int_type); + break; + + case GLSLstd450FindUMsb: + emit_unary_func_op_cast(result_type, id, args[0], "findMSB", uint_type, + int_type); // findMSB always returns int. + break; + + // Multisampled varying + case GLSLstd450InterpolateAtCentroid: + emit_unary_func_op(result_type, id, args[0], "interpolateAtCentroid"); + break; + case GLSLstd450InterpolateAtSample: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtSample"); + break; + case GLSLstd450InterpolateAtOffset: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtOffset"); + break; + + case GLSLstd450NMin: + case GLSLstd450NMax: + { + if (options.vulkan_semantics) + { + require_extension_internal("GL_EXT_spirv_intrinsics"); + bool relaxed = has_decoration(id, DecorationRelaxedPrecision); + Polyfill poly = {}; + switch (get<SPIRType>(result_type).width) + { + case 16: + poly = op == GLSLstd450NMin ? PolyfillNMin16 : PolyfillNMax16; + break; + + case 32: + poly = op == GLSLstd450NMin ? PolyfillNMin32 : PolyfillNMax32; + break; + + case 64: + poly = op == GLSLstd450NMin ? PolyfillNMin64 : PolyfillNMax64; + break; + + default: + SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax."); + } + + require_polyfill(poly, relaxed); + + // Function return decorations are broken, so need to do double polyfill. + if (relaxed) + require_polyfill(poly, false); + + const char *op_str; + if (relaxed) + op_str = op == GLSLstd450NMin ? "spvNMinRelaxed" : "spvNMaxRelaxed"; + else + op_str = op == GLSLstd450NMin ? "spvNMin" : "spvNMax"; + + emit_binary_func_op(result_type, id, args[0], args[1], op_str); + } + else + { + emit_nminmax_op(result_type, id, args[0], args[1], op); + } + break; + } + + case GLSLstd450NClamp: + { + if (options.vulkan_semantics) + { + require_extension_internal("GL_EXT_spirv_intrinsics"); + bool relaxed = has_decoration(id, DecorationRelaxedPrecision); + Polyfill poly = {}; + switch (get<SPIRType>(result_type).width) + { + case 16: + poly = PolyfillNClamp16; + break; + + case 32: + poly = PolyfillNClamp32; + break; + + case 64: + poly = PolyfillNClamp64; + break; + + default: + SPIRV_CROSS_THROW("Invalid bit width for NMin/NMax."); + } + + require_polyfill(poly, relaxed); + + // Function return decorations are broken, so need to do double polyfill. + if (relaxed) + require_polyfill(poly, false); + + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], relaxed ? "spvNClampRelaxed" : "spvNClamp"); + } + else + { + // Make sure we have a unique ID here to avoid aliasing the extra sub-expressions between clamp and NMin sub-op. + // IDs cannot exceed 24 bits, so we can make use of the higher bits for some unique flags. + uint32_t &max_id = extra_sub_expressions[id | EXTRA_SUB_EXPRESSION_TYPE_AUX]; + if (!max_id) + max_id = ir.increase_bound_by(1); + + // Inherit precision qualifiers. + ir.meta[max_id] = ir.meta[id]; + + emit_nminmax_op(result_type, max_id, args[0], args[1], GLSLstd450NMax); + emit_nminmax_op(result_type, id, max_id, args[2], GLSLstd450NMin); + } + break; + } + + default: + statement("// unimplemented GLSL op ", eop); + break; + } +} + +void CompilerGLSL::emit_nminmax_op(uint32_t result_type, uint32_t id, uint32_t op0, uint32_t op1, GLSLstd450 op) +{ + // Need to emulate this call. + uint32_t &ids = extra_sub_expressions[id]; + if (!ids) + { + ids = ir.increase_bound_by(5); + auto btype = get<SPIRType>(result_type); + btype.basetype = SPIRType::Boolean; + set<SPIRType>(ids, btype); + } + + uint32_t btype_id = ids + 0; + uint32_t left_nan_id = ids + 1; + uint32_t right_nan_id = ids + 2; + uint32_t tmp_id = ids + 3; + uint32_t mixed_first_id = ids + 4; + + // Inherit precision qualifiers. + ir.meta[tmp_id] = ir.meta[id]; + ir.meta[mixed_first_id] = ir.meta[id]; + + if (!is_legacy()) + { + emit_unary_func_op(btype_id, left_nan_id, op0, "isnan"); + emit_unary_func_op(btype_id, right_nan_id, op1, "isnan"); + } + else if (expression_type(op0).vecsize > 1) + { + // If the number doesn't equal itself, it must be NaN + emit_binary_func_op(btype_id, left_nan_id, op0, op0, "notEqual"); + emit_binary_func_op(btype_id, right_nan_id, op1, op1, "notEqual"); + } + else + { + emit_binary_op(btype_id, left_nan_id, op0, op0, "!="); + emit_binary_op(btype_id, right_nan_id, op1, op1, "!="); + } + emit_binary_func_op(result_type, tmp_id, op0, op1, op == GLSLstd450NMin ? "min" : "max"); + emit_mix_op(result_type, mixed_first_id, tmp_id, op1, left_nan_id); + emit_mix_op(result_type, id, mixed_first_id, op0, right_nan_id); +} + +void CompilerGLSL::emit_emulated_ahyper_op(uint32_t result_type, uint32_t id, uint32_t op0, GLSLstd450 op) +{ + const char *one = backend.float_literal_suffix ? "1.0f" : "1.0"; + std::string expr; + bool forward = should_forward(op0); + + switch (op) + { + case GLSLstd450Asinh: + expr = join("log(", to_enclosed_expression(op0), " + sqrt(", + to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " + ", one, "))"); + emit_op(result_type, id, expr, forward); + break; + + case GLSLstd450Acosh: + expr = join("log(", to_enclosed_expression(op0), " + sqrt(", + to_enclosed_expression(op0), " * ", to_enclosed_expression(op0), " - ", one, "))"); + break; + + case GLSLstd450Atanh: + expr = join("log((", one, " + ", to_enclosed_expression(op0), ") / " + "(", one, " - ", to_enclosed_expression(op0), ")) * 0.5", + backend.float_literal_suffix ? "f" : ""); + break; + + default: + SPIRV_CROSS_THROW("Invalid op."); + } + + emit_op(result_type, id, expr, forward); + inherit_expression_dependencies(id, op0); +} + +void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, + uint32_t) +{ + require_extension_internal("GL_AMD_shader_ballot"); + + enum AMDShaderBallot + { + SwizzleInvocationsAMD = 1, + SwizzleInvocationsMaskedAMD = 2, + WriteInvocationAMD = 3, + MbcntAMD = 4 + }; + + auto op = static_cast<AMDShaderBallot>(eop); + + switch (op) + { + case SwizzleInvocationsAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD"); + register_control_dependent_expression(id); + break; + + case SwizzleInvocationsMaskedAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD"); + register_control_dependent_expression(id); + break; + + case WriteInvocationAMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD"); + register_control_dependent_expression(id); + break; + + case MbcntAMD: + emit_unary_func_op(result_type, id, args[0], "mbcntAMD"); + register_control_dependent_expression(id); + break; + + default: + statement("// unimplemented SPV AMD shader ballot op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_shader_explicit_vertex_parameter_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + require_extension_internal("GL_AMD_shader_explicit_vertex_parameter"); + + enum AMDShaderExplicitVertexParameter + { + InterpolateAtVertexAMD = 1 + }; + + auto op = static_cast<AMDShaderExplicitVertexParameter>(eop); + + switch (op) + { + case InterpolateAtVertexAMD: + emit_binary_func_op(result_type, id, args[0], args[1], "interpolateAtVertexAMD"); + break; + + default: + statement("// unimplemented SPV AMD shader explicit vertex parameter op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_shader_trinary_minmax_op(uint32_t result_type, uint32_t id, uint32_t eop, + const uint32_t *args, uint32_t) +{ + require_extension_internal("GL_AMD_shader_trinary_minmax"); + + enum AMDShaderTrinaryMinMax + { + FMin3AMD = 1, + UMin3AMD = 2, + SMin3AMD = 3, + FMax3AMD = 4, + UMax3AMD = 5, + SMax3AMD = 6, + FMid3AMD = 7, + UMid3AMD = 8, + SMid3AMD = 9 + }; + + auto op = static_cast<AMDShaderTrinaryMinMax>(eop); + + switch (op) + { + case FMin3AMD: + case UMin3AMD: + case SMin3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "min3"); + break; + + case FMax3AMD: + case UMax3AMD: + case SMax3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "max3"); + break; + + case FMid3AMD: + case UMid3AMD: + case SMid3AMD: + emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "mid3"); + break; + + default: + statement("// unimplemented SPV AMD shader trinary minmax op ", eop); + break; + } +} + +void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id, uint32_t eop, const uint32_t *args, + uint32_t) +{ + require_extension_internal("GL_AMD_gcn_shader"); + + enum AMDGCNShader + { + CubeFaceIndexAMD = 1, + CubeFaceCoordAMD = 2, + TimeAMD = 3 + }; + + auto op = static_cast<AMDGCNShader>(eop); + + switch (op) + { + case CubeFaceIndexAMD: + emit_unary_func_op(result_type, id, args[0], "cubeFaceIndexAMD"); + break; + case CubeFaceCoordAMD: + emit_unary_func_op(result_type, id, args[0], "cubeFaceCoordAMD"); + break; + case TimeAMD: + { + string expr = "timeAMD()"; + emit_op(result_type, id, expr, true); + register_control_dependent_expression(id); + break; + } + + default: + statement("// unimplemented SPV AMD gcn shader op ", eop); + break; + } +} + +void CompilerGLSL::emit_subgroup_op(const Instruction &i) +{ + const uint32_t *ops = stream(i); + auto op = static_cast<Op>(i.op); + + if (!options.vulkan_semantics && !is_supported_subgroup_op_in_opengl(op, ops)) + SPIRV_CROSS_THROW("This subgroup operation is only supported in Vulkan semantics."); + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(i); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + switch (op) + { + case OpGroupNonUniformElect: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupElect); + break; + + case OpGroupNonUniformBallotBitCount: + { + const GroupOperation operation = static_cast<GroupOperation>(ops[3]); + if (operation == GroupOperationReduce) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitCount); + else if (operation == GroupOperationInclusiveScan || operation == GroupOperationExclusiveScan) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); + } + break; + + case OpGroupNonUniformBallotBitExtract: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotBitExtract); + break; + + case OpGroupNonUniformInverseBallot: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInverseBallot_InclBitCount_ExclBitCout); + break; + + case OpGroupNonUniformBallot: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallot); + break; + + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBallotFindLSB_MSB); + break; + + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBroadcast_First); + break; + + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + require_extension_internal("GL_KHR_shader_subgroup_shuffle"); + break; + + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + require_extension_internal("GL_KHR_shader_subgroup_shuffle_relative"); + break; + + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + { + const SPIRType &type = expression_type(ops[3]); + if (type.basetype == SPIRType::BaseType::Boolean && type.vecsize == 1u) + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAll_Any_AllEqualBool); + else + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupAllEqualT); + } + break; + + // clang-format off +#define GLSL_GROUP_OP(OP)\ + case OpGroupNonUniform##OP:\ + {\ + auto operation = static_cast<GroupOperation>(ops[3]);\ + if (operation == GroupOperationClusteredReduce)\ + require_extension_internal("GL_KHR_shader_subgroup_clustered");\ + else if (operation == GroupOperationReduce)\ + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##Reduce);\ + else if (operation == GroupOperationExclusiveScan)\ + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##ExclusiveScan);\ + else if (operation == GroupOperationInclusiveScan)\ + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupArithmetic##OP##InclusiveScan);\ + else\ + SPIRV_CROSS_THROW("Invalid group operation.");\ + break;\ + } + + GLSL_GROUP_OP(IAdd) + GLSL_GROUP_OP(FAdd) + GLSL_GROUP_OP(IMul) + GLSL_GROUP_OP(FMul) + +#undef GLSL_GROUP_OP + // clang-format on + + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + { + auto operation = static_cast<GroupOperation>(ops[3]); + if (operation == GroupOperationClusteredReduce) + { + require_extension_internal("GL_KHR_shader_subgroup_clustered"); + } + else if (operation == GroupOperationExclusiveScan || operation == GroupOperationInclusiveScan || + operation == GroupOperationReduce) + { + require_extension_internal("GL_KHR_shader_subgroup_arithmetic"); + } + else + SPIRV_CROSS_THROW("Invalid group operation."); + break; + } + + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + require_extension_internal("GL_KHR_shader_subgroup_quad"); + break; + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2])); + if (scope != ScopeSubgroup) + SPIRV_CROSS_THROW("Only subgroup scope is supported."); + + switch (op) + { + case OpGroupNonUniformElect: + emit_op(result_type, id, "subgroupElect()", true); + break; + + case OpGroupNonUniformBroadcast: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBroadcast"); + break; + + case OpGroupNonUniformBroadcastFirst: + emit_unary_func_op(result_type, id, ops[3], "subgroupBroadcastFirst"); + break; + + case OpGroupNonUniformBallot: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallot"); + break; + + case OpGroupNonUniformInverseBallot: + emit_unary_func_op(result_type, id, ops[3], "subgroupInverseBallot"); + break; + + case OpGroupNonUniformBallotBitExtract: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupBallotBitExtract"); + break; + + case OpGroupNonUniformBallotFindLSB: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindLSB"); + break; + + case OpGroupNonUniformBallotFindMSB: + emit_unary_func_op(result_type, id, ops[3], "subgroupBallotFindMSB"); + break; + + case OpGroupNonUniformBallotBitCount: + { + auto operation = static_cast<GroupOperation>(ops[3]); + if (operation == GroupOperationReduce) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotBitCount"); + else if (operation == GroupOperationInclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotInclusiveBitCount"); + else if (operation == GroupOperationExclusiveScan) + emit_unary_func_op(result_type, id, ops[4], "subgroupBallotExclusiveBitCount"); + else + SPIRV_CROSS_THROW("Invalid BitCount operation."); + break; + } + + case OpGroupNonUniformShuffle: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffle"); + break; + + case OpGroupNonUniformShuffleXor: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleXor"); + break; + + case OpGroupNonUniformShuffleUp: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleUp"); + break; + + case OpGroupNonUniformShuffleDown: + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupShuffleDown"); + break; + + case OpGroupNonUniformAll: + emit_unary_func_op(result_type, id, ops[3], "subgroupAll"); + break; + + case OpGroupNonUniformAny: + emit_unary_func_op(result_type, id, ops[3], "subgroupAny"); + break; + + case OpGroupNonUniformAllEqual: + emit_unary_func_op(result_type, id, ops[3], "subgroupAllEqual"); + break; + + // clang-format off +#define GLSL_GROUP_OP(op, glsl_op) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast<GroupOperation>(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op(result_type, id, ops[4], "subgroup" #glsl_op); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "subgroupInclusive" #glsl_op); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op(result_type, id, ops[4], "subgroupExclusive" #glsl_op); \ + else if (operation == GroupOperationClusteredReduce) \ + emit_binary_func_op(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + +#define GLSL_GROUP_OP_CAST(op, glsl_op, type) \ +case OpGroupNonUniform##op: \ + { \ + auto operation = static_cast<GroupOperation>(ops[3]); \ + if (operation == GroupOperationReduce) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroup" #glsl_op, type, type); \ + else if (operation == GroupOperationInclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroupInclusive" #glsl_op, type, type); \ + else if (operation == GroupOperationExclusiveScan) \ + emit_unary_func_op_cast(result_type, id, ops[4], "subgroupExclusive" #glsl_op, type, type); \ + else if (operation == GroupOperationClusteredReduce) \ + emit_binary_func_op_cast_clustered(result_type, id, ops[4], ops[5], "subgroupClustered" #glsl_op, type); \ + else \ + SPIRV_CROSS_THROW("Invalid group operation."); \ + break; \ + } + + GLSL_GROUP_OP(FAdd, Add) + GLSL_GROUP_OP(FMul, Mul) + GLSL_GROUP_OP(FMin, Min) + GLSL_GROUP_OP(FMax, Max) + GLSL_GROUP_OP(IAdd, Add) + GLSL_GROUP_OP(IMul, Mul) + GLSL_GROUP_OP_CAST(SMin, Min, int_type) + GLSL_GROUP_OP_CAST(SMax, Max, int_type) + GLSL_GROUP_OP_CAST(UMin, Min, uint_type) + GLSL_GROUP_OP_CAST(UMax, Max, uint_type) + GLSL_GROUP_OP(BitwiseAnd, And) + GLSL_GROUP_OP(BitwiseOr, Or) + GLSL_GROUP_OP(BitwiseXor, Xor) + GLSL_GROUP_OP(LogicalAnd, And) + GLSL_GROUP_OP(LogicalOr, Or) + GLSL_GROUP_OP(LogicalXor, Xor) +#undef GLSL_GROUP_OP +#undef GLSL_GROUP_OP_CAST + // clang-format on + + case OpGroupNonUniformQuadSwap: + { + uint32_t direction = evaluate_constant_u32(ops[4]); + if (direction == 0) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapHorizontal"); + else if (direction == 1) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapVertical"); + else if (direction == 2) + emit_unary_func_op(result_type, id, ops[3], "subgroupQuadSwapDiagonal"); + else + SPIRV_CROSS_THROW("Invalid quad swap direction."); + break; + } + + case OpGroupNonUniformQuadBroadcast: + { + emit_binary_func_op(result_type, id, ops[3], ops[4], "subgroupQuadBroadcast"); + break; + } + + default: + SPIRV_CROSS_THROW("Invalid opcode for subgroup."); + } + + register_control_dependent_expression(id); +} + +string CompilerGLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &in_type) +{ + // OpBitcast can deal with pointers. + if (out_type.pointer || in_type.pointer) + { + if (out_type.vecsize == 2 || in_type.vecsize == 2) + require_extension_internal("GL_EXT_buffer_reference_uvec2"); + return type_to_glsl(out_type); + } + + if (out_type.basetype == in_type.basetype) + return ""; + + assert(out_type.basetype != SPIRType::Boolean); + assert(in_type.basetype != SPIRType::Boolean); + + bool integral_cast = type_is_integral(out_type) && type_is_integral(in_type); + bool same_size_cast = out_type.width == in_type.width; + + // Trivial bitcast case, casts between integers. + if (integral_cast && same_size_cast) + return type_to_glsl(out_type); + + // Catch-all 8-bit arithmetic casts (GL_EXT_shader_explicit_arithmetic_types). + if (out_type.width == 8 && in_type.width >= 16 && integral_cast && in_type.vecsize == 1) + return "unpack8"; + else if (in_type.width == 8 && out_type.width == 16 && integral_cast && out_type.vecsize == 1) + return "pack16"; + else if (in_type.width == 8 && out_type.width == 32 && integral_cast && out_type.vecsize == 1) + return "pack32"; + + // Floating <-> Integer special casts. Just have to enumerate all cases. :( + // 16-bit, 32-bit and 64-bit floats. + if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Float) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Float -> Uint bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "floatBitsToUint"; + } + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Float) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Float -> Int bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "floatBitsToInt"; + } + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::UInt) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Uint -> Float bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "uintBitsToFloat"; + } + else if (out_type.basetype == SPIRType::Float && in_type.basetype == SPIRType::Int) + { + if (is_legacy_es()) + SPIRV_CROSS_THROW("Int -> Float bitcast not supported on legacy ESSL."); + else if (!options.es && options.version < 330) + require_extension_internal("GL_ARB_shader_bit_encoding"); + return "intBitsToFloat"; + } + + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToInt64"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::Double) + return "doubleBitsToUint64"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::Int64) + return "int64BitsToDouble"; + else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) + return "uint64BitsToDouble"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Half) + return "float16BitsToInt16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::Half) + return "float16BitsToUint16"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::Short) + return "int16BitsToFloat16"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UShort) + return "uint16BitsToFloat16"; + + // And finally, some even more special purpose casts. + if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UInt && in_type.vecsize == 2) + return "packUint2x32"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UInt64 && out_type.vecsize == 2) + return "unpackUint2x32"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + return "unpackFloat2x16"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) + return "packFloat2x16"; + else if (out_type.basetype == SPIRType::Int && in_type.basetype == SPIRType::Short && in_type.vecsize == 2) + return "packInt2x16"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int && in_type.vecsize == 1) + return "unpackInt2x16"; + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::UShort && in_type.vecsize == 2) + return "packUint2x16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + return "unpackUint2x16"; + else if (out_type.basetype == SPIRType::Int64 && in_type.basetype == SPIRType::Short && in_type.vecsize == 4) + return "packInt4x16"; + else if (out_type.basetype == SPIRType::Short && in_type.basetype == SPIRType::Int64 && in_type.vecsize == 1) + return "unpackInt4x16"; + else if (out_type.basetype == SPIRType::UInt64 && in_type.basetype == SPIRType::UShort && in_type.vecsize == 4) + return "packUint4x16"; + else if (out_type.basetype == SPIRType::UShort && in_type.basetype == SPIRType::UInt64 && in_type.vecsize == 1) + return "unpackUint4x16"; + + return ""; +} + +string CompilerGLSL::bitcast_glsl(const SPIRType &result_type, uint32_t argument) +{ + auto op = bitcast_glsl_op(result_type, expression_type(argument)); + if (op.empty()) + return to_enclosed_unpacked_expression(argument); + else + return join(op, "(", to_unpacked_expression(argument), ")"); +} + +std::string CompilerGLSL::bitcast_expression(SPIRType::BaseType target_type, uint32_t arg) +{ + auto expr = to_expression(arg); + auto &src_type = expression_type(arg); + if (src_type.basetype != target_type) + { + auto target = src_type; + target.basetype = target_type; + expr = join(bitcast_glsl_op(target, src_type), "(", expr, ")"); + } + + return expr; +} + +std::string CompilerGLSL::bitcast_expression(const SPIRType &target_type, SPIRType::BaseType expr_type, + const std::string &expr) +{ + if (target_type.basetype == expr_type) + return expr; + + auto src_type = target_type; + src_type.basetype = expr_type; + return join(bitcast_glsl_op(target_type, src_type), "(", expr, ")"); +} + +string CompilerGLSL::builtin_to_glsl(BuiltIn builtin, StorageClass storage) +{ + switch (builtin) + { + case BuiltInPosition: + return "gl_Position"; + case BuiltInPointSize: + return "gl_PointSize"; + case BuiltInClipDistance: + { + if (options.es) + require_extension_internal("GL_EXT_clip_cull_distance"); + return "gl_ClipDistance"; + } + case BuiltInCullDistance: + { + if (options.es) + require_extension_internal("GL_EXT_clip_cull_distance"); + return "gl_CullDistance"; + } + case BuiltInVertexId: + if (options.vulkan_semantics) + SPIRV_CROSS_THROW("Cannot implement gl_VertexID in Vulkan GLSL. This shader was created " + "with GL semantics."); + return "gl_VertexID"; + case BuiltInInstanceId: + if (options.vulkan_semantics) + { + auto model = get_entry_point().model; + switch (model) + { + case spv::ExecutionModelIntersectionKHR: + case spv::ExecutionModelAnyHitKHR: + case spv::ExecutionModelClosestHitKHR: + // gl_InstanceID is allowed in these shaders. + break; + + default: + SPIRV_CROSS_THROW("Cannot implement gl_InstanceID in Vulkan GLSL. This shader was " + "created with GL semantics."); + } + } + if (!options.es && options.version < 140) + { + require_extension_internal("GL_ARB_draw_instanced"); + } + return "gl_InstanceID"; + case BuiltInVertexIndex: + if (options.vulkan_semantics) + return "gl_VertexIndex"; + else + return "gl_VertexID"; // gl_VertexID already has the base offset applied. + case BuiltInInstanceIndex: + if (options.vulkan_semantics) + return "gl_InstanceIndex"; + + if (!options.es && options.version < 140) + { + require_extension_internal("GL_ARB_draw_instanced"); + } + + if (options.vertex.support_nonzero_base_instance) + { + if (!options.vulkan_semantics) + { + // This is a soft-enable. We will opt-in to using gl_BaseInstanceARB if supported. + require_extension_internal("GL_ARB_shader_draw_parameters"); + } + return "(gl_InstanceID + SPIRV_Cross_BaseInstance)"; // ... but not gl_InstanceID. + } + else + return "gl_InstanceID"; + case BuiltInPrimitiveId: + if (storage == StorageClassInput && get_entry_point().model == ExecutionModelGeometry) + return "gl_PrimitiveIDIn"; + else + return "gl_PrimitiveID"; + case BuiltInInvocationId: + return "gl_InvocationID"; + case BuiltInLayer: + return "gl_Layer"; + case BuiltInViewportIndex: + return "gl_ViewportIndex"; + case BuiltInTessLevelOuter: + return "gl_TessLevelOuter"; + case BuiltInTessLevelInner: + return "gl_TessLevelInner"; + case BuiltInTessCoord: + return "gl_TessCoord"; + case BuiltInPatchVertices: + return "gl_PatchVerticesIn"; + case BuiltInFragCoord: + return "gl_FragCoord"; + case BuiltInPointCoord: + return "gl_PointCoord"; + case BuiltInFrontFacing: + return "gl_FrontFacing"; + case BuiltInFragDepth: + return "gl_FragDepth"; + case BuiltInNumWorkgroups: + return "gl_NumWorkGroups"; + case BuiltInWorkgroupSize: + return "gl_WorkGroupSize"; + case BuiltInWorkgroupId: + return "gl_WorkGroupID"; + case BuiltInLocalInvocationId: + return "gl_LocalInvocationID"; + case BuiltInGlobalInvocationId: + return "gl_GlobalInvocationID"; + case BuiltInLocalInvocationIndex: + return "gl_LocalInvocationIndex"; + case BuiltInHelperInvocation: + return "gl_HelperInvocation"; + + case BuiltInBaseVertex: + if (options.es) + SPIRV_CROSS_THROW("BaseVertex not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_BaseVertexARB"; + } + return "gl_BaseVertex"; + } + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "SPIRV_Cross_BaseVertex"; + + case BuiltInBaseInstance: + if (options.es) + SPIRV_CROSS_THROW("BaseInstance not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_BaseInstanceARB"; + } + return "gl_BaseInstance"; + } + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "SPIRV_Cross_BaseInstance"; + + case BuiltInDrawIndex: + if (options.es) + SPIRV_CROSS_THROW("DrawIndex not supported in ES profile."); + + if (options.vulkan_semantics) + { + if (options.version < 460) + { + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_DrawIDARB"; + } + return "gl_DrawID"; + } + // On regular GL, this is soft-enabled and we emit ifdefs in code. + require_extension_internal("GL_ARB_shader_draw_parameters"); + return "gl_DrawIDARB"; + + case BuiltInSampleId: + if (is_legacy()) + SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); + else if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + else if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_sample_shading"); + return "gl_SampleID"; + + case BuiltInSampleMask: + if (is_legacy()) + SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); + else if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + else if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_sample_shading"); + + if (storage == StorageClassInput) + return "gl_SampleMaskIn"; + else + return "gl_SampleMask"; + + case BuiltInSamplePosition: + if (is_legacy()) + SPIRV_CROSS_THROW("Sample variables not supported in legacy GLSL."); + else if (options.es && options.version < 320) + require_extension_internal("GL_OES_sample_variables"); + else if (!options.es && options.version < 400) + require_extension_internal("GL_ARB_sample_shading"); + return "gl_SamplePosition"; + + case BuiltInViewIndex: + if (options.vulkan_semantics) + return "gl_ViewIndex"; + else + return "gl_ViewID_OVR"; + + case BuiltInNumSubgroups: + request_subgroup_feature(ShaderSubgroupSupportHelper::NumSubgroups); + return "gl_NumSubgroups"; + + case BuiltInSubgroupId: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupID); + return "gl_SubgroupID"; + + case BuiltInSubgroupSize: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupSize); + return "gl_SubgroupSize"; + + case BuiltInSubgroupLocalInvocationId: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupInvocationID); + return "gl_SubgroupInvocationID"; + + case BuiltInSubgroupEqMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupEqMask"; + + case BuiltInSubgroupGeMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupGeMask"; + + case BuiltInSubgroupGtMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupGtMask"; + + case BuiltInSubgroupLeMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupLeMask"; + + case BuiltInSubgroupLtMask: + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMask); + return "gl_SubgroupLtMask"; + + case BuiltInLaunchIdKHR: + return ray_tracing_is_khr ? "gl_LaunchIDEXT" : "gl_LaunchIDNV"; + case BuiltInLaunchSizeKHR: + return ray_tracing_is_khr ? "gl_LaunchSizeEXT" : "gl_LaunchSizeNV"; + case BuiltInWorldRayOriginKHR: + return ray_tracing_is_khr ? "gl_WorldRayOriginEXT" : "gl_WorldRayOriginNV"; + case BuiltInWorldRayDirectionKHR: + return ray_tracing_is_khr ? "gl_WorldRayDirectionEXT" : "gl_WorldRayDirectionNV"; + case BuiltInObjectRayOriginKHR: + return ray_tracing_is_khr ? "gl_ObjectRayOriginEXT" : "gl_ObjectRayOriginNV"; + case BuiltInObjectRayDirectionKHR: + return ray_tracing_is_khr ? "gl_ObjectRayDirectionEXT" : "gl_ObjectRayDirectionNV"; + case BuiltInRayTminKHR: + return ray_tracing_is_khr ? "gl_RayTminEXT" : "gl_RayTminNV"; + case BuiltInRayTmaxKHR: + return ray_tracing_is_khr ? "gl_RayTmaxEXT" : "gl_RayTmaxNV"; + case BuiltInInstanceCustomIndexKHR: + return ray_tracing_is_khr ? "gl_InstanceCustomIndexEXT" : "gl_InstanceCustomIndexNV"; + case BuiltInObjectToWorldKHR: + return ray_tracing_is_khr ? "gl_ObjectToWorldEXT" : "gl_ObjectToWorldNV"; + case BuiltInWorldToObjectKHR: + return ray_tracing_is_khr ? "gl_WorldToObjectEXT" : "gl_WorldToObjectNV"; + case BuiltInHitTNV: + // gl_HitTEXT is an alias of RayTMax in KHR. + return "gl_HitTNV"; + case BuiltInHitKindKHR: + return ray_tracing_is_khr ? "gl_HitKindEXT" : "gl_HitKindNV"; + case BuiltInIncomingRayFlagsKHR: + return ray_tracing_is_khr ? "gl_IncomingRayFlagsEXT" : "gl_IncomingRayFlagsNV"; + + case BuiltInBaryCoordKHR: + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("gl_BaryCoordEXT requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("gl_BaryCoordEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNV"; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + return "gl_BaryCoordEXT"; + } + } + + case BuiltInBaryCoordNoPerspNV: + { + if (options.es && options.version < 320) + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires ESSL 320."); + else if (!options.es && options.version < 450) + SPIRV_CROSS_THROW("gl_BaryCoordNoPerspEXT requires GLSL 450."); + + if (barycentric_is_nv) + { + require_extension_internal("GL_NV_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspNV"; + } + else + { + require_extension_internal("GL_EXT_fragment_shader_barycentric"); + return "gl_BaryCoordNoPerspEXT"; + } + } + + case BuiltInFragStencilRefEXT: + { + if (!options.es) + { + require_extension_internal("GL_ARB_shader_stencil_export"); + return "gl_FragStencilRefARB"; + } + else + SPIRV_CROSS_THROW("Stencil export not supported in GLES."); + } + + case BuiltInPrimitiveShadingRateKHR: + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use PrimitiveShadingRateKHR in Vulkan GLSL."); + require_extension_internal("GL_EXT_fragment_shading_rate"); + return "gl_PrimitiveShadingRateEXT"; + } + + case BuiltInShadingRateKHR: + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Can only use ShadingRateKHR in Vulkan GLSL."); + require_extension_internal("GL_EXT_fragment_shading_rate"); + return "gl_ShadingRateEXT"; + } + + case BuiltInDeviceIndex: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Need Vulkan semantics for device group support."); + require_extension_internal("GL_EXT_device_group"); + return "gl_DeviceIndex"; + + case BuiltInFullyCoveredEXT: + if (!options.es) + require_extension_internal("GL_NV_conservative_raster_underestimation"); + else + SPIRV_CROSS_THROW("Need desktop GL to use GL_NV_conservative_raster_underestimation."); + return "gl_FragFullyCoveredNV"; + + case BuiltInPrimitiveTriangleIndicesEXT: + return "gl_PrimitiveTriangleIndicesEXT"; + case BuiltInPrimitiveLineIndicesEXT: + return "gl_PrimitiveLineIndicesEXT"; + case BuiltInPrimitivePointIndicesEXT: + return "gl_PrimitivePointIndicesEXT"; + case BuiltInCullPrimitiveEXT: + return "gl_CullPrimitiveEXT"; + + default: + return join("gl_BuiltIn_", convert_to_string(builtin)); + } +} + +const char *CompilerGLSL::index_to_swizzle(uint32_t index) +{ + switch (index) + { + case 0: + return "x"; + case 1: + return "y"; + case 2: + return "z"; + case 3: + return "w"; + default: + return "x"; // Don't crash, but engage the "undefined behavior" described for out-of-bounds logical addressing in spec. + } +} + +void CompilerGLSL::access_chain_internal_append_index(std::string &expr, uint32_t /*base*/, const SPIRType * /*type*/, + AccessChainFlags flags, bool &access_chain_is_arrayed, + uint32_t index) +{ + bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; + bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; + bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + + string idx_expr = index_is_literal ? convert_to_string(index) : to_unpacked_expression(index, register_expression_read); + + // For the case where the base of an OpPtrAccessChain already ends in [n], + // we need to use the index as an offset to the existing index, otherwise, + // we can just use the index directly. + if (ptr_chain && access_chain_is_arrayed) + { + size_t split_pos = expr.find_last_of(']'); + size_t enclose_split = expr.find_last_of(')'); + + // If we have already enclosed the expression, don't try to be clever, it will break. + if (split_pos > enclose_split || enclose_split == string::npos) + { + string expr_front = expr.substr(0, split_pos); + string expr_back = expr.substr(split_pos); + expr = expr_front + " + " + enclose_expression(idx_expr) + expr_back; + return; + } + } + + expr += "["; + expr += idx_expr; + expr += "]"; +} + +bool CompilerGLSL::access_chain_needs_stage_io_builtin_translation(uint32_t) +{ + return true; +} + +string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, + AccessChainFlags flags, AccessChainMeta *meta) +{ + string expr; + + bool index_is_literal = (flags & ACCESS_CHAIN_INDEX_IS_LITERAL_BIT) != 0; + bool msb_is_id = (flags & ACCESS_CHAIN_LITERAL_MSB_FORCE_ID) != 0; + bool chain_only = (flags & ACCESS_CHAIN_CHAIN_ONLY_BIT) != 0; + bool ptr_chain = (flags & ACCESS_CHAIN_PTR_CHAIN_BIT) != 0; + bool register_expression_read = (flags & ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT) == 0; + bool flatten_member_reference = (flags & ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT) != 0; + + if (!chain_only) + { + // We handle transpose explicitly, so don't resolve that here. + auto *e = maybe_get<SPIRExpression>(base); + bool old_transpose = e && e->need_transpose; + if (e) + e->need_transpose = false; + expr = to_enclosed_expression(base, register_expression_read); + if (e) + e->need_transpose = old_transpose; + } + + // Start traversing type hierarchy at the proper non-pointer types, + // but keep type_id referencing the original pointer for use below. + uint32_t type_id = expression_type_id(base); + const auto *type = &get_pointee_type(type_id); + + if (!backend.native_pointers) + { + if (ptr_chain) + SPIRV_CROSS_THROW("Backend does not support native pointers and does not support OpPtrAccessChain."); + + // Wrapped buffer reference pointer types will need to poke into the internal "value" member before + // continuing the access chain. + if (should_dereference(base)) + expr = dereference_expression(get<SPIRType>(type_id), expr); + } + else if (should_dereference(base) && type->basetype != SPIRType::Struct && !ptr_chain) + expr = join("(", dereference_expression(*type, expr), ")"); + + bool access_chain_is_arrayed = expr.find_first_of('[') != string::npos; + bool row_major_matrix_needs_conversion = is_non_native_row_major_matrix(base); + bool is_packed = has_extended_decoration(base, SPIRVCrossDecorationPhysicalTypePacked); + uint32_t physical_type = get_extended_decoration(base, SPIRVCrossDecorationPhysicalTypeID); + bool is_invariant = has_decoration(base, DecorationInvariant); + bool relaxed_precision = has_decoration(base, DecorationRelaxedPrecision); + bool pending_array_enclose = false; + bool dimension_flatten = false; + bool access_meshlet_position_y = false; + + if (auto *base_expr = maybe_get<SPIRExpression>(base)) + { + access_meshlet_position_y = base_expr->access_meshlet_position_y; + } + + // If we are translating access to a structured buffer, the first subscript '._m0' must be hidden + bool hide_first_subscript = count > 1 && is_user_type_structured(base); + + const auto append_index = [&](uint32_t index, bool is_literal, bool is_ptr_chain = false) { + AccessChainFlags mod_flags = flags; + if (!is_literal) + mod_flags &= ~ACCESS_CHAIN_INDEX_IS_LITERAL_BIT; + if (!is_ptr_chain) + mod_flags &= ~ACCESS_CHAIN_PTR_CHAIN_BIT; + access_chain_internal_append_index(expr, base, type, mod_flags, access_chain_is_arrayed, index); + check_physical_type_cast(expr, type, physical_type); + }; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + bool is_literal = index_is_literal; + if (is_literal && msb_is_id && (index >> 31u) != 0u) + { + is_literal = false; + index &= 0x7fffffffu; + } + + bool ptr_chain_array_entry = ptr_chain && i == 0 && is_array(*type); + + if (ptr_chain_array_entry) + { + // This is highly unusual code, since normally we'd use plain AccessChain, but it's still allowed. + // We are considered to have a pointer to array and one element shifts by one array at a time. + // If we use normal array indexing, we'll first decay to pointer, and lose the array-ness, + // so we have to take pointer to array explicitly. + if (!should_dereference(base)) + expr = enclose_expression(address_of_expression(expr)); + } + + if (ptr_chain && i == 0) + { + // Pointer chains + // If we are flattening multidimensional arrays, only create opening bracket on first + // array index. + if (options.flatten_multidimensional_arrays) + { + dimension_flatten = type->array.size() >= 1; + pending_array_enclose = dimension_flatten; + if (pending_array_enclose) + expr += "["; + } + + if (options.flatten_multidimensional_arrays && dimension_flatten) + { + // If we are flattening multidimensional arrays, do manual stride computation. + if (is_literal) + expr += convert_to_string(index); + else + expr += to_enclosed_expression(index, register_expression_read); + + for (auto j = uint32_t(type->array.size()); j; j--) + { + expr += " * "; + expr += enclose_expression(to_array_size(*type, j - 1)); + } + + if (type->array.empty()) + pending_array_enclose = false; + else + expr += " + "; + + if (!pending_array_enclose) + expr += "]"; + } + else + { + if (flags & ACCESS_CHAIN_PTR_CHAIN_POINTER_ARITH_BIT) + { + SPIRType tmp_type(OpTypeInt); + tmp_type.basetype = SPIRType::UInt64; + tmp_type.width = 64; + tmp_type.vecsize = 1; + tmp_type.columns = 1; + + TypeID ptr_type_id = expression_type_id(base); + const SPIRType &ptr_type = get<SPIRType>(ptr_type_id); + const SPIRType &pointee_type = get_pointee_type(ptr_type); + + // This only runs in native pointer backends. + // Can replace reinterpret_cast with a backend string if ever needed. + // We expect this to count as a de-reference. + // This leaks some MSL details, but feels slightly overkill to + // add yet another virtual interface just for this. + auto intptr_expr = join("reinterpret_cast<", type_to_glsl(tmp_type), ">(", expr, ")"); + intptr_expr += join(" + ", to_enclosed_unpacked_expression(index), " * ", + get_decoration(ptr_type_id, DecorationArrayStride)); + + if (flags & ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT) + { + is_packed = true; + expr = join("*reinterpret_cast<device packed_", type_to_glsl(pointee_type), + " *>(", intptr_expr, ")"); + } + else + { + expr = join("*reinterpret_cast<", type_to_glsl(ptr_type), ">(", intptr_expr, ")"); + } + } + else + append_index(index, is_literal, true); + } + + if (type->basetype == SPIRType::ControlPointArray) + { + type_id = type->parent_type; + type = &get<SPIRType>(type_id); + } + + access_chain_is_arrayed = true; + + // Explicitly enclose the expression if this is one of the weird pointer-to-array cases. + // We don't want any future indexing to add to this array dereference. + // Enclosing the expression blocks that and avoids any shenanigans with operand priority. + if (ptr_chain_array_entry) + expr = join("(", expr, ")"); + } + // Arrays + else if (!type->array.empty()) + { + // If we are flattening multidimensional arrays, only create opening bracket on first + // array index. + if (options.flatten_multidimensional_arrays && !pending_array_enclose) + { + dimension_flatten = type->array.size() > 1; + pending_array_enclose = dimension_flatten; + if (pending_array_enclose) + expr += "["; + } + + assert(type->parent_type); + + auto *var = maybe_get<SPIRVariable>(base); + if (backend.force_gl_in_out_block && i == 0 && var && is_builtin_variable(*var) && + !has_decoration(type->self, DecorationBlock)) + { + // This deals with scenarios for tesc/geom where arrays of gl_Position[] are declared. + // Normally, these variables live in blocks when compiled from GLSL, + // but HLSL seems to just emit straight arrays here. + // We must pretend this access goes through gl_in/gl_out arrays + // to be able to access certain builtins as arrays. + // Similar concerns apply for mesh shaders where we have to redirect to gl_MeshVerticesEXT or MeshPrimitivesEXT. + auto builtin = ir.meta[base].decoration.builtin_type; + bool mesh_shader = get_execution_model() == ExecutionModelMeshEXT; + + switch (builtin) + { + case BuiltInCullDistance: + case BuiltInClipDistance: + if (type->array.size() == 1) // Red herring. Only consider block IO for two-dimensional arrays here. + { + append_index(index, is_literal); + break; + } + // fallthrough + case BuiltInPosition: + case BuiltInPointSize: + if (mesh_shader) + expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); + else if (var->storage == StorageClassInput) + expr = join("gl_in[", to_expression(index, register_expression_read), "].", expr); + else if (var->storage == StorageClassOutput) + expr = join("gl_out[", to_expression(index, register_expression_read), "].", expr); + else + append_index(index, is_literal); + break; + + case BuiltInPrimitiveId: + case BuiltInLayer: + case BuiltInViewportIndex: + case BuiltInCullPrimitiveEXT: + case BuiltInPrimitiveShadingRateKHR: + if (mesh_shader) + expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); + else + append_index(index, is_literal); + break; + + default: + append_index(index, is_literal); + break; + } + } + else if (backend.force_merged_mesh_block && i == 0 && var && + !is_builtin_variable(*var) && var->storage == StorageClassOutput) + { + if (is_per_primitive_variable(*var)) + expr = join("gl_MeshPrimitivesEXT[", to_expression(index, register_expression_read), "].", expr); + else + expr = join("gl_MeshVerticesEXT[", to_expression(index, register_expression_read), "].", expr); + } + else if (options.flatten_multidimensional_arrays && dimension_flatten) + { + // If we are flattening multidimensional arrays, do manual stride computation. + auto &parent_type = get<SPIRType>(type->parent_type); + + if (is_literal) + expr += convert_to_string(index); + else + expr += to_enclosed_expression(index, register_expression_read); + + for (auto j = uint32_t(parent_type.array.size()); j; j--) + { + expr += " * "; + expr += enclose_expression(to_array_size(parent_type, j - 1)); + } + + if (parent_type.array.empty()) + pending_array_enclose = false; + else + expr += " + "; + + if (!pending_array_enclose) + expr += "]"; + } + else if (index_is_literal || !builtin_translates_to_nonarray(BuiltIn(get_decoration(base, DecorationBuiltIn)))) + { + // Some builtins are arrays in SPIR-V but not in other languages, e.g. gl_SampleMask[] is an array in SPIR-V but not in Metal. + // By throwing away the index, we imply the index was 0, which it must be for gl_SampleMask. + // For literal indices we are working on composites, so we ignore this since we have already converted to proper array. + append_index(index, is_literal); + } + + if (var && has_decoration(var->self, DecorationBuiltIn) && + get_decoration(var->self, DecorationBuiltIn) == BuiltInPosition && + get_execution_model() == ExecutionModelMeshEXT) + { + access_meshlet_position_y = true; + } + + type_id = type->parent_type; + type = &get<SPIRType>(type_id); + + // If the physical type has an unnatural vecsize, + // we must assume it's a faked struct where the .data member + // is used for the real payload. + if (physical_type && (is_vector(*type) || is_scalar(*type))) + { + auto &phys = get<SPIRType>(physical_type); + if (phys.vecsize > 4) + expr += ".data"; + } + + access_chain_is_arrayed = true; + } + // For structs, the index refers to a constant, which indexes into the members, possibly through a redirection mapping. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + if (!is_literal) + index = evaluate_constant_u32(index); + + if (index < uint32_t(type->member_type_index_redirection.size())) + index = type->member_type_index_redirection[index]; + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + if (hide_first_subscript) + { + // First "._m0" subscript has been hidden, subsequent fields must be emitted even for structured buffers + hide_first_subscript = false; + } + else + { + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(*type, index, &builtin) && access_chain_needs_stage_io_builtin_translation(base)) + { + if (access_chain_is_arrayed) + { + expr += "."; + expr += builtin_to_glsl(builtin, type->storage); + } + else + expr = builtin_to_glsl(builtin, type->storage); + + if (builtin == BuiltInPosition && get_execution_model() == ExecutionModelMeshEXT) + { + access_meshlet_position_y = true; + } + } + else + { + // If the member has a qualified name, use it as the entire chain + string qual_mbr_name = get_member_qualified_name(type_id, index); + if (!qual_mbr_name.empty()) + expr = qual_mbr_name; + else if (flatten_member_reference) + expr += join("_", to_member_name(*type, index)); + else + { + // Any pointer de-refences for values are handled in the first access chain. + // For pointer chains, the pointer-ness is resolved through an array access. + // The only time this is not true is when accessing array of SSBO/UBO. + // This case is explicitly handled. + expr += to_member_reference(base, *type, index, ptr_chain || i != 0); + } + } + } + + if (has_member_decoration(type->self, index, DecorationInvariant)) + is_invariant = true; + if (has_member_decoration(type->self, index, DecorationRelaxedPrecision)) + relaxed_precision = true; + + is_packed = member_is_packed_physical_type(*type, index); + if (member_is_remapped_physical_type(*type, index)) + physical_type = get_extended_member_decoration(type->self, index, SPIRVCrossDecorationPhysicalTypeID); + else + physical_type = 0; + + row_major_matrix_needs_conversion = member_is_non_native_row_major_matrix(*type, index); + type = &get<SPIRType>(type->member_types[index]); + } + // Matrix -> Vector + else if (type->columns > 1) + { + // If we have a row-major matrix here, we need to defer any transpose in case this access chain + // is used to store a column. We can resolve it right here and now if we access a scalar directly, + // by flipping indexing order of the matrix. + + expr += "["; + if (is_literal) + expr += convert_to_string(index); + else + expr += to_unpacked_expression(index, register_expression_read); + expr += "]"; + + // If the physical type has an unnatural vecsize, + // we must assume it's a faked struct where the .data member + // is used for the real payload. + if (physical_type) + { + auto &phys = get<SPIRType>(physical_type); + if (phys.vecsize > 4 || phys.columns > 4) + expr += ".data"; + } + + type_id = type->parent_type; + type = &get<SPIRType>(type_id); + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + string deferred_index; + if (row_major_matrix_needs_conversion) + { + // Flip indexing order. + auto column_index = expr.find_last_of('['); + if (column_index != string::npos) + { + deferred_index = expr.substr(column_index); + + auto end_deferred_index = deferred_index.find_last_of(']'); + if (end_deferred_index != string::npos && end_deferred_index + 1 != deferred_index.size()) + { + // If we have any data member fixups, it must be transposed so that it refers to this index. + // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong, + // and needs to be [1].data[0] instead. + end_deferred_index++; + deferred_index = deferred_index.substr(end_deferred_index) + + deferred_index.substr(0, end_deferred_index); + } + + expr.resize(column_index); + } + } + + // Internally, access chain implementation can also be used on composites, + // ignore scalar access workarounds in this case. + StorageClass effective_storage = StorageClassGeneric; + bool ignore_potential_sliced_writes = false; + if ((flags & ACCESS_CHAIN_FORCE_COMPOSITE_BIT) == 0) + { + if (expression_type(base).pointer) + effective_storage = get_expression_effective_storage_class(base); + + // Special consideration for control points. + // Control points can only be written by InvocationID, so there is no need + // to consider scalar access chains here. + // Cleans up some cases where it's very painful to determine the accurate storage class + // since blocks can be partially masked ... + auto *var = maybe_get_backing_variable(base); + if (var && var->storage == StorageClassOutput && + get_execution_model() == ExecutionModelTessellationControl && + !has_decoration(var->self, DecorationPatch)) + { + ignore_potential_sliced_writes = true; + } + } + else + ignore_potential_sliced_writes = true; + + if (!row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) + { + // On some backends, we might not be able to safely access individual scalars in a vector. + // To work around this, we might have to cast the access chain reference to something which can, + // like a pointer to scalar, which we can then index into. + prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage, + is_packed); + } + + if (is_literal) + { + bool out_of_bounds = (index >= type->vecsize); + + if (!is_packed && !row_major_matrix_needs_conversion) + { + expr += "."; + expr += index_to_swizzle(out_of_bounds ? 0 : index); + } + else + { + // For packed vectors, we can only access them as an array, not by swizzle. + expr += join("[", out_of_bounds ? 0 : index, "]"); + } + } + else if (ir.ids[index].get_type() == TypeConstant && !is_packed && !row_major_matrix_needs_conversion) + { + auto &c = get<SPIRConstant>(index); + bool out_of_bounds = (c.scalar() >= type->vecsize); + + if (c.specialization) + { + // If the index is a spec constant, we cannot turn extract into a swizzle. + expr += join("[", out_of_bounds ? "0" : to_expression(index), "]"); + } + else + { + expr += "."; + expr += index_to_swizzle(out_of_bounds ? 0 : c.scalar()); + } + } + else + { + expr += "["; + expr += to_unpacked_expression(index, register_expression_read); + expr += "]"; + } + + if (row_major_matrix_needs_conversion && !ignore_potential_sliced_writes) + { + if (prepare_access_chain_for_scalar_access(expr, get<SPIRType>(type->parent_type), effective_storage, + is_packed)) + { + // We're in a pointer context now, so just remove any member dereference. + auto first_index = deferred_index.find_first_of('['); + if (first_index != string::npos && first_index != 0) + deferred_index = deferred_index.substr(first_index); + } + } + + if (access_meshlet_position_y) + { + if (is_literal) + { + access_meshlet_position_y = index == 1; + } + else + { + const auto *c = maybe_get<SPIRConstant>(index); + if (c) + access_meshlet_position_y = c->scalar() == 1; + else + { + // We don't know, but we have to assume no. + // Flip Y in mesh shaders is an opt-in horrible hack, so we'll have to assume shaders try to behave. + access_meshlet_position_y = false; + } + } + } + + expr += deferred_index; + row_major_matrix_needs_conversion = false; + + is_packed = false; + physical_type = 0; + type_id = type->parent_type; + type = &get<SPIRType>(type_id); + } + else if (!backend.allow_truncated_access_chain) + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + if (pending_array_enclose) + { + SPIRV_CROSS_THROW("Flattening of multidimensional arrays were enabled, " + "but the access chain was terminated in the middle of a multidimensional array. " + "This is not supported."); + } + + if (meta) + { + meta->need_transpose = row_major_matrix_needs_conversion; + meta->storage_is_packed = is_packed; + meta->storage_is_invariant = is_invariant; + meta->storage_physical_type = physical_type; + meta->relaxed_precision = relaxed_precision; + meta->access_meshlet_position_y = access_meshlet_position_y; + } + + return expr; +} + +void CompilerGLSL::check_physical_type_cast(std::string &, const SPIRType *, uint32_t) +{ +} + +bool CompilerGLSL::prepare_access_chain_for_scalar_access(std::string &, const SPIRType &, spv::StorageClass, bool &) +{ + return false; +} + +string CompilerGLSL::to_flattened_struct_member(const string &basename, const SPIRType &type, uint32_t index) +{ + auto ret = join(basename, "_", to_member_name(type, index)); + ParsedIR::sanitize_underscores(ret); + return ret; +} + +uint32_t CompilerGLSL::get_physical_type_stride(const SPIRType &) const +{ + SPIRV_CROSS_THROW("Invalid to call get_physical_type_stride on a backend without native pointer support."); +} + +string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type, + AccessChainMeta *meta, bool ptr_chain) +{ + if (flattened_buffer_blocks.count(base)) + { + uint32_t matrix_stride = 0; + uint32_t array_stride = 0; + bool need_transpose = false; + flattened_access_chain_offset(expression_type(base), indices, count, 0, 16, &need_transpose, &matrix_stride, + &array_stride, ptr_chain); + + if (meta) + { + meta->need_transpose = target_type.columns > 1 && need_transpose; + meta->storage_is_packed = false; + } + + return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, array_stride, + need_transpose); + } + else if (flattened_structs.count(base) && count > 0) + { + AccessChainFlags flags = ACCESS_CHAIN_CHAIN_ONLY_BIT | ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; + if (ptr_chain) + flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; + + if (flattened_structs[base]) + { + flags |= ACCESS_CHAIN_FLATTEN_ALL_MEMBERS_BIT; + if (meta) + meta->flattened_struct = target_type.basetype == SPIRType::Struct; + } + + auto chain = access_chain_internal(base, indices, count, flags, nullptr).substr(1); + if (meta) + { + meta->need_transpose = false; + meta->storage_is_packed = false; + } + + auto basename = to_flattened_access_chain_expression(base); + auto ret = join(basename, "_", chain); + ParsedIR::sanitize_underscores(ret); + return ret; + } + else + { + AccessChainFlags flags = ACCESS_CHAIN_SKIP_REGISTER_EXPRESSION_READ_BIT; + if (ptr_chain) + { + flags |= ACCESS_CHAIN_PTR_CHAIN_BIT; + // PtrAccessChain could get complicated. + TypeID type_id = expression_type_id(base); + if (backend.native_pointers && has_decoration(type_id, DecorationArrayStride)) + { + // If there is a mismatch we have to go via 64-bit pointer arithmetic :'( + // Using packed hacks only gets us so far, and is not designed to deal with pointer to + // random values. It works for structs though. + auto &pointee_type = get_pointee_type(get<SPIRType>(type_id)); + uint32_t physical_stride = get_physical_type_stride(pointee_type); + uint32_t requested_stride = get_decoration(type_id, DecorationArrayStride); + if (physical_stride != requested_stride) + { + flags |= ACCESS_CHAIN_PTR_CHAIN_POINTER_ARITH_BIT; + if (is_vector(pointee_type)) + flags |= ACCESS_CHAIN_PTR_CHAIN_CAST_TO_SCALAR_BIT; + } + } + } + + return access_chain_internal(base, indices, count, flags, meta); + } +} + +string CompilerGLSL::load_flattened_struct(const string &basename, const SPIRType &type) +{ + auto expr = type_to_glsl_constructor(type); + expr += '('; + + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + if (i) + expr += ", "; + + auto &member_type = get<SPIRType>(type.member_types[i]); + if (member_type.basetype == SPIRType::Struct) + expr += load_flattened_struct(to_flattened_struct_member(basename, type, i), member_type); + else + expr += to_flattened_struct_member(basename, type, i); + } + expr += ')'; + return expr; +} + +std::string CompilerGLSL::to_flattened_access_chain_expression(uint32_t id) +{ + // Do not use to_expression as that will unflatten access chains. + string basename; + if (const auto *var = maybe_get<SPIRVariable>(id)) + basename = to_name(var->self); + else if (const auto *expr = maybe_get<SPIRExpression>(id)) + basename = expr->expression; + else + basename = to_expression(id); + + return basename; +} + +void CompilerGLSL::store_flattened_struct(const string &basename, uint32_t rhs_id, const SPIRType &type, + const SmallVector<uint32_t> &indices) +{ + SmallVector<uint32_t> sub_indices = indices; + sub_indices.push_back(0); + + auto *member_type = &type; + for (auto &index : indices) + member_type = &get<SPIRType>(member_type->member_types[index]); + + for (uint32_t i = 0; i < uint32_t(member_type->member_types.size()); i++) + { + sub_indices.back() = i; + auto lhs = join(basename, "_", to_member_name(*member_type, i)); + ParsedIR::sanitize_underscores(lhs); + + if (get<SPIRType>(member_type->member_types[i]).basetype == SPIRType::Struct) + { + store_flattened_struct(lhs, rhs_id, type, sub_indices); + } + else + { + auto rhs = to_expression(rhs_id) + to_multi_member_reference(type, sub_indices); + statement(lhs, " = ", rhs, ";"); + } + } +} + +void CompilerGLSL::store_flattened_struct(uint32_t lhs_id, uint32_t value) +{ + auto &type = expression_type(lhs_id); + auto basename = to_flattened_access_chain_expression(lhs_id); + store_flattened_struct(basename, value, type, {}); +} + +std::string CompilerGLSL::flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride, + uint32_t /* array_stride */, bool need_transpose) +{ + if (!target_type.array.empty()) + SPIRV_CROSS_THROW("Access chains that result in an array can not be flattened"); + else if (target_type.basetype == SPIRType::Struct) + return flattened_access_chain_struct(base, indices, count, target_type, offset); + else if (target_type.columns > 1) + return flattened_access_chain_matrix(base, indices, count, target_type, offset, matrix_stride, need_transpose); + else + return flattened_access_chain_vector(base, indices, count, target_type, offset, matrix_stride, need_transpose); +} + +std::string CompilerGLSL::flattened_access_chain_struct(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset) +{ + std::string expr; + + if (backend.can_declare_struct_inline) + { + expr += type_to_glsl_constructor(target_type); + expr += "("; + } + else + expr += "{"; + + for (uint32_t i = 0; i < uint32_t(target_type.member_types.size()); ++i) + { + if (i != 0) + expr += ", "; + + const SPIRType &member_type = get<SPIRType>(target_type.member_types[i]); + uint32_t member_offset = type_struct_member_offset(target_type, i); + + // The access chain terminates at the struct, so we need to find matrix strides and row-major information + // ahead of time. + bool need_transpose = false; + bool relaxed = false; + uint32_t matrix_stride = 0; + if (member_type.columns > 1) + { + auto decorations = combined_decoration_for_member(target_type, i); + need_transpose = decorations.get(DecorationRowMajor); + relaxed = decorations.get(DecorationRelaxedPrecision); + matrix_stride = type_struct_member_matrix_stride(target_type, i); + } + + auto tmp = flattened_access_chain(base, indices, count, member_type, offset + member_offset, matrix_stride, + 0 /* array_stride */, need_transpose); + + // Cannot forward transpositions, so resolve them here. + if (need_transpose) + expr += convert_row_major_matrix(tmp, member_type, 0, false, relaxed); + else + expr += tmp; + } + + expr += backend.can_declare_struct_inline ? ")" : "}"; + + return expr; +} + +std::string CompilerGLSL::flattened_access_chain_matrix(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, + uint32_t matrix_stride, bool need_transpose) +{ + assert(matrix_stride); + SPIRType tmp_type = target_type; + if (need_transpose) + swap(tmp_type.vecsize, tmp_type.columns); + + std::string expr; + + expr += type_to_glsl_constructor(tmp_type); + expr += "("; + + for (uint32_t i = 0; i < tmp_type.columns; i++) + { + if (i != 0) + expr += ", "; + + expr += flattened_access_chain_vector(base, indices, count, tmp_type, offset + i * matrix_stride, matrix_stride, + /* need_transpose= */ false); + } + + expr += ")"; + + return expr; +} + +std::string CompilerGLSL::flattened_access_chain_vector(uint32_t base, const uint32_t *indices, uint32_t count, + const SPIRType &target_type, uint32_t offset, + uint32_t matrix_stride, bool need_transpose) +{ + auto result = flattened_access_chain_offset(expression_type(base), indices, count, offset, 16); + + auto buffer_name = to_name(expression_type(base).self); + + if (need_transpose) + { + std::string expr; + + if (target_type.vecsize > 1) + { + expr += type_to_glsl_constructor(target_type); + expr += "("; + } + + for (uint32_t i = 0; i < target_type.vecsize; ++i) + { + if (i != 0) + expr += ", "; + + uint32_t component_offset = result.second + i * matrix_stride; + + assert(component_offset % (target_type.width / 8) == 0); + uint32_t index = component_offset / (target_type.width / 8); + + expr += buffer_name; + expr += "["; + expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + + expr += convert_to_string(index / 4); + expr += "]"; + + expr += vector_swizzle(1, index % 4); + } + + if (target_type.vecsize > 1) + { + expr += ")"; + } + + return expr; + } + else + { + assert(result.second % (target_type.width / 8) == 0); + uint32_t index = result.second / (target_type.width / 8); + + std::string expr; + + expr += buffer_name; + expr += "["; + expr += result.first; // this is a series of N1 * k1 + N2 * k2 + ... that is either empty or ends with a + + expr += convert_to_string(index / 4); + expr += "]"; + + expr += vector_swizzle(target_type.vecsize, index % 4); + + return expr; + } +} + +std::pair<std::string, uint32_t> CompilerGLSL::flattened_access_chain_offset( + const SPIRType &basetype, const uint32_t *indices, uint32_t count, uint32_t offset, uint32_t word_stride, + bool *need_transpose, uint32_t *out_matrix_stride, uint32_t *out_array_stride, bool ptr_chain) +{ + // Start traversing type hierarchy at the proper non-pointer types. + const auto *type = &get_pointee_type(basetype); + + std::string expr; + + // Inherit matrix information in case we are access chaining a vector which might have come from a row major layout. + bool row_major_matrix_needs_conversion = need_transpose ? *need_transpose : false; + uint32_t matrix_stride = out_matrix_stride ? *out_matrix_stride : 0; + uint32_t array_stride = out_array_stride ? *out_array_stride : 0; + + for (uint32_t i = 0; i < count; i++) + { + uint32_t index = indices[i]; + + // Pointers + if (ptr_chain && i == 0) + { + // Here, the pointer type will be decorated with an array stride. + array_stride = get_decoration(basetype.self, DecorationArrayStride); + if (!array_stride) + SPIRV_CROSS_THROW("SPIR-V does not define ArrayStride for buffer block."); + + auto *constant = maybe_get<SPIRConstant>(index); + if (constant) + { + // Constant array access. + offset += constant->scalar() * array_stride; + } + else + { + // Dynamic array access. + if (array_stride % word_stride) + { + SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " + "of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push " + "constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index); + expr += " * "; + expr += convert_to_string(array_stride / word_stride); + expr += " + "; + } + } + // Arrays + else if (!type->array.empty()) + { + auto *constant = maybe_get<SPIRConstant>(index); + if (constant) + { + // Constant array access. + offset += constant->scalar() * array_stride; + } + else + { + // Dynamic array access. + if (array_stride % word_stride) + { + SPIRV_CROSS_THROW("Array stride for dynamic indexing must be divisible by the size " + "of a 4-component vector. " + "Likely culprit here is a float or vec2 array inside a push " + "constant block which is std430. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(array_stride / word_stride); + expr += " + "; + } + + uint32_t parent_type = type->parent_type; + type = &get<SPIRType>(parent_type); + + if (!type->array.empty()) + array_stride = get_decoration(parent_type, DecorationArrayStride); + } + // For structs, the index refers to a constant, which indexes into the members. + // We also check if this member is a builtin, since we then replace the entire expression with the builtin one. + else if (type->basetype == SPIRType::Struct) + { + index = evaluate_constant_u32(index); + + if (index >= type->member_types.size()) + SPIRV_CROSS_THROW("Member index is out of bounds!"); + + offset += type_struct_member_offset(*type, index); + + auto &struct_type = *type; + type = &get<SPIRType>(type->member_types[index]); + + if (type->columns > 1) + { + matrix_stride = type_struct_member_matrix_stride(struct_type, index); + row_major_matrix_needs_conversion = + combined_decoration_for_member(struct_type, index).get(DecorationRowMajor); + } + else + row_major_matrix_needs_conversion = false; + + if (!type->array.empty()) + array_stride = type_struct_member_array_stride(struct_type, index); + } + // Matrix -> Vector + else if (type->columns > 1) + { + auto *constant = maybe_get<SPIRConstant>(index); + if (constant) + { + index = evaluate_constant_u32(index); + offset += index * (row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? (type->width / 8) : matrix_stride; + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW("Matrix stride for dynamic indexing must be divisible by the size of a " + "4-component vector. " + "Likely culprit here is a row-major matrix being accessed dynamically. " + "This cannot be flattened. Try using std140 layout instead."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } + + type = &get<SPIRType>(type->parent_type); + } + // Vector -> Scalar + else if (type->vecsize > 1) + { + auto *constant = maybe_get<SPIRConstant>(index); + if (constant) + { + index = evaluate_constant_u32(index); + offset += index * (row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8)); + } + else + { + uint32_t indexing_stride = row_major_matrix_needs_conversion ? matrix_stride : (type->width / 8); + + // Dynamic array access. + if (indexing_stride % word_stride) + { + SPIRV_CROSS_THROW("Stride for dynamic vector indexing must be divisible by the " + "size of a 4-component vector. " + "This cannot be flattened in legacy targets."); + } + + expr += to_enclosed_expression(index, false); + expr += " * "; + expr += convert_to_string(indexing_stride / word_stride); + expr += " + "; + } + + type = &get<SPIRType>(type->parent_type); + } + else + SPIRV_CROSS_THROW("Cannot subdivide a scalar value!"); + } + + if (need_transpose) + *need_transpose = row_major_matrix_needs_conversion; + if (out_matrix_stride) + *out_matrix_stride = matrix_stride; + if (out_array_stride) + *out_array_stride = array_stride; + + return std::make_pair(expr, offset); +} + +bool CompilerGLSL::should_dereference(uint32_t id) +{ + const auto &type = expression_type(id); + // Non-pointer expressions don't need to be dereferenced. + if (!type.pointer) + return false; + + // Handles shouldn't be dereferenced either. + if (!expression_is_lvalue(id)) + return false; + + // If id is a variable but not a phi variable, we should not dereference it. + if (auto *var = maybe_get<SPIRVariable>(id)) + return var->phi_variable; + + if (auto *expr = maybe_get<SPIRExpression>(id)) + { + // If id is an access chain, we should not dereference it. + if (expr->access_chain) + return false; + + // If id is a forwarded copy of a variable pointer, we should not dereference it. + SPIRVariable *var = nullptr; + while (expr->loaded_from && expression_is_forwarded(expr->self)) + { + auto &src_type = expression_type(expr->loaded_from); + // To be a copy, the pointer and its source expression must be the + // same type. Can't check type.self, because for some reason that's + // usually the base type with pointers stripped off. This check is + // complex enough that I've hoisted it out of the while condition. + if (src_type.pointer != type.pointer || src_type.pointer_depth != type.pointer_depth || + src_type.parent_type != type.parent_type) + break; + if ((var = maybe_get<SPIRVariable>(expr->loaded_from))) + break; + if (!(expr = maybe_get<SPIRExpression>(expr->loaded_from))) + break; + } + + return !var || var->phi_variable; + } + + // Otherwise, we should dereference this pointer expression. + return true; +} + +bool CompilerGLSL::should_forward(uint32_t id) const +{ + // If id is a variable we will try to forward it regardless of force_temporary check below + // This is important because otherwise we'll get local sampler copies (highp sampler2D foo = bar) that are invalid in OpenGL GLSL + + auto *var = maybe_get<SPIRVariable>(id); + if (var) + { + // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. + return !(has_decoration(id, DecorationBuiltIn) && has_decoration(id, DecorationVolatile)); + } + + // For debugging emit temporary variables for all expressions + if (options.force_temporary) + return false; + + // If an expression carries enough dependencies we need to stop forwarding at some point, + // or we explode compilers. There are usually limits to how much we can nest expressions. + auto *expr = maybe_get<SPIRExpression>(id); + const uint32_t max_expression_dependencies = 64; + if (expr && expr->expression_dependencies.size() >= max_expression_dependencies) + return false; + + if (expr && expr->loaded_from + && has_decoration(expr->loaded_from, DecorationBuiltIn) + && has_decoration(expr->loaded_from, DecorationVolatile)) + { + // Never forward volatile builtin variables, e.g. SPIR-V 1.6 HelperInvocation. + return false; + } + + // Immutable expression can always be forwarded. + if (is_immutable(id)) + return true; + + return false; +} + +bool CompilerGLSL::should_suppress_usage_tracking(uint32_t id) const +{ + // Used only by opcodes which don't do any real "work", they just swizzle data in some fashion. + return !expression_is_forwarded(id) || expression_suppresses_usage_tracking(id); +} + +void CompilerGLSL::track_expression_read(uint32_t id) +{ + switch (ir.ids[id].get_type()) + { + case TypeExpression: + { + auto &e = get<SPIRExpression>(id); + for (auto implied_read : e.implied_read_expressions) + track_expression_read(implied_read); + break; + } + + case TypeAccessChain: + { + auto &e = get<SPIRAccessChain>(id); + for (auto implied_read : e.implied_read_expressions) + track_expression_read(implied_read); + break; + } + + default: + break; + } + + // If we try to read a forwarded temporary more than once we will stamp out possibly complex code twice. + // In this case, it's better to just bind the complex expression to the temporary and read that temporary twice. + if (expression_is_forwarded(id) && !expression_suppresses_usage_tracking(id)) + { + auto &v = expression_usage_counts[id]; + v++; + + // If we create an expression outside a loop, + // but access it inside a loop, we're implicitly reading it multiple times. + // If the expression in question is expensive, we should hoist it out to avoid relying on loop-invariant code motion + // working inside the backend compiler. + if (expression_read_implies_multiple_reads(id)) + v++; + + if (v >= 2) + { + //if (v == 2) + // fprintf(stderr, "ID %u was forced to temporary due to more than 1 expression use!\n", id); + + // Force a recompile after this pass to avoid forwarding this variable. + force_temporary_and_recompile(id); + } + } +} + +bool CompilerGLSL::args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure) +{ + if (forced_temporaries.find(id) != end(forced_temporaries)) + return false; + + for (uint32_t i = 0; i < num_args; i++) + if (!should_forward(args[i])) + return false; + + // We need to forward globals as well. + if (!pure) + { + for (auto global : global_variables) + if (!should_forward(global)) + return false; + for (auto aliased : aliased_variables) + if (!should_forward(aliased)) + return false; + } + + return true; +} + +void CompilerGLSL::register_impure_function_call() +{ + // Impure functions can modify globals and aliased variables, so invalidate them as well. + for (auto global : global_variables) + flush_dependees(get<SPIRVariable>(global)); + for (auto aliased : aliased_variables) + flush_dependees(get<SPIRVariable>(aliased)); +} + +void CompilerGLSL::register_call_out_argument(uint32_t id) +{ + register_write(id); + + auto *var = maybe_get<SPIRVariable>(id); + if (var) + flush_variable_declaration(var->self); +} + +string CompilerGLSL::variable_decl_function_local(SPIRVariable &var) +{ + // These variables are always function local, + // so make sure we emit the variable without storage qualifiers. + // Some backends will inject custom variables locally in a function + // with a storage qualifier which is not function-local. + auto old_storage = var.storage; + var.storage = StorageClassFunction; + auto expr = variable_decl(var); + var.storage = old_storage; + return expr; +} + +void CompilerGLSL::emit_variable_temporary_copies(const SPIRVariable &var) +{ + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + if (var.allocate_temporary_copy && !flushed_phi_variables.count(var.self)) + { + auto &type = get<SPIRType>(var.basetype); + auto &flags = get_decoration_bitset(var.self); + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, join("_", var.self, "_copy")), ";"); + flushed_phi_variables.insert(var.self); + } +} + +void CompilerGLSL::flush_variable_declaration(uint32_t id) +{ + // Ensure that we declare phi-variable copies even if the original declaration isn't deferred + auto *var = maybe_get<SPIRVariable>(id); + if (var && var->deferred_declaration) + { + string initializer; + if (options.force_zero_initialized_variables && + (var->storage == StorageClassFunction || var->storage == StorageClassGeneric || + var->storage == StorageClassPrivate) && + !var->initializer && type_can_zero_initialize(get_variable_data_type(*var))) + { + initializer = join(" = ", to_zero_initialized_expression(get_variable_data_type_id(*var))); + } + + statement(variable_decl_function_local(*var), initializer, ";"); + var->deferred_declaration = false; + } + if (var) + { + emit_variable_temporary_copies(*var); + } +} + +bool CompilerGLSL::remove_duplicate_swizzle(string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto prevpos = op.find_last_of('.', pos - 1); + if (prevpos == string::npos) + return false; + + prevpos++; + + // Make sure there are only swizzles here ... + for (auto i = prevpos; i < pos; i++) + { + if (op[i] < 'w' || op[i] > 'z') + { + // If swizzles are foo.xyz() like in C++ backend for example, check for that. + if (backend.swizzle_is_function && i + 2 == pos && op[i] == '(' && op[i + 1] == ')') + break; + return false; + } + } + + // If original swizzle is large enough, just carve out the components we need. + // E.g. foobar.wyx.xy will turn into foobar.wy. + if (pos - prevpos >= final_swiz.size()) + { + op.erase(prevpos + final_swiz.size(), string::npos); + + // Add back the function call ... + if (backend.swizzle_is_function) + op += "()"; + } + return true; +} + +// Optimizes away vector swizzles where we have something like +// vec3 foo; +// foo.xyz <-- swizzle expression does nothing. +// This is a very common pattern after OpCompositeCombine. +bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) +{ + auto pos = op.find_last_of('.'); + if (pos == string::npos || pos == 0) + return false; + + string final_swiz = op.substr(pos + 1, string::npos); + + if (backend.swizzle_is_function) + { + if (final_swiz.size() < 2) + return false; + + if (final_swiz.substr(final_swiz.size() - 2, string::npos) == "()") + final_swiz.erase(final_swiz.size() - 2, string::npos); + else + return false; + } + + // Check if final swizzle is of form .x, .xy, .xyz, .xyzw or similar. + // If so, and previous swizzle is of same length, + // we can drop the final swizzle altogether. + for (uint32_t i = 0; i < final_swiz.size(); i++) + { + static const char expected[] = { 'x', 'y', 'z', 'w' }; + if (i >= 4 || final_swiz[i] != expected[i]) + return false; + } + + auto &type = expression_type(base); + + // Sanity checking ... + assert(type.columns == 1 && type.array.empty()); + + if (type.vecsize == final_swiz.size()) + op.erase(pos, string::npos); + return true; +} + +string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) +{ + ID base = 0; + string op; + string subop; + + // Can only merge swizzles for vectors. + auto &type = get<SPIRType>(return_type); + bool can_apply_swizzle_opt = type.basetype != SPIRType::Struct && type.array.empty() && type.columns == 1; + bool swizzle_optimization = false; + + for (uint32_t i = 0; i < length; i++) + { + auto *e = maybe_get<SPIRExpression>(elems[i]); + + // If we're merging another scalar which belongs to the same base + // object, just merge the swizzles to avoid triggering more than 1 expression read as much as possible! + if (can_apply_swizzle_opt && e && e->base_expression && e->base_expression == base) + { + // Only supposed to be used for vector swizzle -> scalar. + assert(!e->expression.empty() && e->expression.front() == '.'); + subop += e->expression.substr(1, string::npos); + swizzle_optimization = true; + } + else + { + // We'll likely end up with duplicated swizzles, e.g. + // foobar.xyz.xyz from patterns like + // OpVectorShuffle + // OpCompositeExtract x 3 + // OpCompositeConstruct 3x + other scalar. + // Just modify op in-place. + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + // Don't attempt to remove unity swizzling if we managed to remove duplicate swizzles. + // The base "foo" might be vec4, while foo.xyz is vec3 (OpVectorShuffle) and looks like a vec3 due to the .xyz tacked on. + // We only want to remove the swizzles if we're certain that the resulting base will be the same vecsize. + // Essentially, we can only remove one set of swizzles, since that's what we have control over ... + // Case 1: + // foo.yxz.xyz: Duplicate swizzle kicks in, giving foo.yxz, we are done. + // foo.yxz was the result of OpVectorShuffle and we don't know the type of foo. + // Case 2: + // foo.xyz: Duplicate swizzle won't kick in. + // If foo is vec3, we can remove xyz, giving just foo. + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + swizzle_optimization = false; + op += subop; + } + else + op += subop; + + if (i) + op += ", "; + + bool uses_buffer_offset = + type.basetype == SPIRType::Struct && has_member_decoration(type.self, i, DecorationOffset); + subop = to_composite_constructor_expression(type, elems[i], uses_buffer_offset); + } + + base = e ? e->base_expression : ID(0); + } + + if (swizzle_optimization) + { + if (backend.swizzle_is_function) + subop += "()"; + + if (!remove_duplicate_swizzle(subop)) + remove_unity_swizzle(base, subop); + // Strips away redundant parens if we created them during component extraction. + strip_enclosed_expression(subop); + } + + op += subop; + return op; +} + +bool CompilerGLSL::skip_argument(uint32_t id) const +{ + if (!combined_image_samplers.empty() || !options.vulkan_semantics) + { + auto &type = expression_type(id); + if (type.basetype == SPIRType::Sampler || (type.basetype == SPIRType::Image && type.image.sampled == 1)) + return true; + } + return false; +} + +bool CompilerGLSL::optimize_read_modify_write(const SPIRType &type, const string &lhs, const string &rhs) +{ + // Do this with strings because we have a very clear pattern we can check for and it avoids + // adding lots of special cases to the code emission. + if (rhs.size() < lhs.size() + 3) + return false; + + // Do not optimize matrices. They are a bit awkward to reason about in general + // (in which order does operation happen?), and it does not work on MSL anyways. + if (type.vecsize > 1 && type.columns > 1) + return false; + + auto index = rhs.find(lhs); + if (index != 0) + return false; + + // TODO: Shift operators, but it's not important for now. + auto op = rhs.find_first_of("+-/*%|&^", lhs.size() + 1); + if (op != lhs.size() + 1) + return false; + + // Check that the op is followed by space. This excludes && and ||. + if (rhs[op + 1] != ' ') + return false; + + char bop = rhs[op]; + auto expr = rhs.substr(lhs.size() + 3); + + // Avoids false positives where we get a = a * b + c. + // Normally, these expressions are always enclosed, but unexpected code paths may end up hitting this. + if (needs_enclose_expression(expr)) + return false; + + // Try to find increments and decrements. Makes it look neater as += 1, -= 1 is fairly rare to see in real code. + // Find some common patterns which are equivalent. + if ((bop == '+' || bop == '-') && (expr == "1" || expr == "uint(1)" || expr == "1u" || expr == "int(1u)")) + statement(lhs, bop, bop, ";"); + else + statement(lhs, " ", bop, "= ", expr, ";"); + return true; +} + +void CompilerGLSL::register_control_dependent_expression(uint32_t expr) +{ + if (forwarded_temporaries.find(expr) == end(forwarded_temporaries)) + return; + + assert(current_emitting_block); + current_emitting_block->invalidate_expressions.push_back(expr); +} + +void CompilerGLSL::emit_block_instructions(SPIRBlock &block) +{ + current_emitting_block = █ + + if (backend.requires_relaxed_precision_analysis) + { + // If PHI variables are consumed in unexpected precision contexts, copy them here. + for (size_t i = 0, n = block.phi_variables.size(); i < n; i++) + { + auto &phi = block.phi_variables[i]; + + // Ensure we only copy once. We know a-priori that this array will lay out + // the same function variables together. + if (i && block.phi_variables[i - 1].function_variable == phi.function_variable) + continue; + + auto itr = temporary_to_mirror_precision_alias.find(phi.function_variable); + if (itr != temporary_to_mirror_precision_alias.end()) + { + // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, + // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). + EmbeddedInstruction inst; + inst.op = OpCopyObject; + inst.length = 3; + inst.ops.push_back(expression_type_id(itr->first)); + inst.ops.push_back(itr->second); + inst.ops.push_back(itr->first); + emit_instruction(inst); + } + } + } + + for (auto &op : block.ops) + { + auto temporary_copy = handle_instruction_precision(op); + emit_instruction(op); + if (temporary_copy.dst_id) + { + // Explicitly, we don't want to inherit RelaxedPrecision state in this CopyObject, + // so it helps to have handle_instruction_precision() on the outside of emit_instruction(). + EmbeddedInstruction inst; + inst.op = OpCopyObject; + inst.length = 3; + inst.ops.push_back(expression_type_id(temporary_copy.src_id)); + inst.ops.push_back(temporary_copy.dst_id); + inst.ops.push_back(temporary_copy.src_id); + + // Never attempt to hoist mirrored temporaries. + // They are hoisted in lock-step with their parents. + block_temporary_hoisting = true; + emit_instruction(inst); + block_temporary_hoisting = false; + } + } + + current_emitting_block = nullptr; +} + +void CompilerGLSL::disallow_forwarding_in_expression_chain(const SPIRExpression &expr) +{ + // Allow trivially forwarded expressions like OpLoad or trivial shuffles, + // these will be marked as having suppressed usage tracking. + // Our only concern is to make sure arithmetic operations are done in similar ways. + if (expression_is_forwarded(expr.self) && !expression_suppresses_usage_tracking(expr.self) && + forced_invariant_temporaries.count(expr.self) == 0) + { + force_temporary_and_recompile(expr.self); + forced_invariant_temporaries.insert(expr.self); + + for (auto &dependent : expr.expression_dependencies) + disallow_forwarding_in_expression_chain(get<SPIRExpression>(dependent)); + } +} + +void CompilerGLSL::handle_store_to_invariant_variable(uint32_t store_id, uint32_t value_id) +{ + // Variables or access chains marked invariant are complicated. We will need to make sure the code-gen leading up to + // this variable is consistent. The failure case for SPIRV-Cross is when an expression is forced to a temporary + // in one translation unit, but not another, e.g. due to multiple use of an expression. + // This causes variance despite the output variable being marked invariant, so the solution here is to force all dependent + // expressions to be temporaries. + // It is uncertain if this is enough to support invariant in all possible cases, but it should be good enough + // for all reasonable uses of invariant. + if (!has_decoration(store_id, DecorationInvariant)) + return; + + auto *expr = maybe_get<SPIRExpression>(value_id); + if (!expr) + return; + + disallow_forwarding_in_expression_chain(*expr); +} + +void CompilerGLSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) +{ + auto rhs = to_pointer_expression(rhs_expression); + + // Statements to OpStore may be empty if it is a struct with zero members. Just forward the store to /dev/null. + if (!rhs.empty()) + { + handle_store_to_invariant_variable(lhs_expression, rhs_expression); + + if (!unroll_array_to_complex_store(lhs_expression, rhs_expression)) + { + auto lhs = to_dereferenced_expression(lhs_expression); + if (has_decoration(lhs_expression, DecorationNonUniform)) + convert_non_uniform_expression(lhs, lhs_expression); + + // We might need to cast in order to store to a builtin. + cast_to_variable_store(lhs_expression, rhs, expression_type(rhs_expression)); + + // Tries to optimize assignments like "<lhs> = <lhs> op expr". + // While this is purely cosmetic, this is important for legacy ESSL where loop + // variable increments must be in either i++ or i += const-expr. + // Without this, we end up with i = i + 1, which is correct GLSL, but not correct GLES 2.0. + if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } + register_write(lhs_expression); + } +} + +uint32_t CompilerGLSL::get_integer_width_for_instruction(const Instruction &instr) const +{ + if (instr.length < 3) + return 32; + + auto *ops = stream(instr); + + switch (instr.op) + { + case OpSConvert: + case OpConvertSToF: + case OpUConvert: + case OpConvertUToF: + case OpIEqual: + case OpINotEqual: + case OpSLessThan: + case OpSLessThanEqual: + case OpSGreaterThan: + case OpSGreaterThanEqual: + case OpULessThan: + case OpULessThanEqual: + case OpUGreaterThan: + case OpUGreaterThanEqual: + return expression_type(ops[2]).width; + + case OpSMulExtended: + case OpUMulExtended: + return get<SPIRType>(get<SPIRType>(ops[0]).member_types[0]).width; + + default: + { + // We can look at result type which is more robust. + auto *type = maybe_get<SPIRType>(ops[0]); + if (type && type_is_integral(*type)) + return type->width; + else + return 32; + } + } +} + +uint32_t CompilerGLSL::get_integer_width_for_glsl_instruction(GLSLstd450 op, const uint32_t *ops, uint32_t length) const +{ + if (length < 1) + return 32; + + switch (op) + { + case GLSLstd450SAbs: + case GLSLstd450SSign: + case GLSLstd450UMin: + case GLSLstd450SMin: + case GLSLstd450UMax: + case GLSLstd450SMax: + case GLSLstd450UClamp: + case GLSLstd450SClamp: + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: + return expression_type(ops[0]).width; + + default: + { + // We don't need to care about other opcodes, just return 32. + return 32; + } + } +} + +void CompilerGLSL::forward_relaxed_precision(uint32_t dst_id, const uint32_t *args, uint32_t length) +{ + // Only GLSL supports RelaxedPrecision directly. + // We cannot implement this in HLSL or MSL because it is tied to the type system. + // In SPIR-V, everything must masquerade as 32-bit. + if (!backend.requires_relaxed_precision_analysis) + return; + + auto input_precision = analyze_expression_precision(args, length); + + // For expressions which are loaded or directly forwarded, we inherit mediump implicitly. + // For dst_id to be analyzed properly, it must inherit any relaxed precision decoration from src_id. + if (input_precision == Options::Mediump) + set_decoration(dst_id, DecorationRelaxedPrecision); +} + +CompilerGLSL::Options::Precision CompilerGLSL::analyze_expression_precision(const uint32_t *args, uint32_t length) const +{ + // Now, analyze the precision at which the arguments would run. + // GLSL rules are such that the precision used to evaluate an expression is equal to the highest precision + // for the inputs. Constants do not have inherent precision and do not contribute to this decision. + // If all inputs are constants, they inherit precision from outer expressions, including an l-value. + // In this case, we'll have to force a temporary for dst_id so that we can bind the constant expression with + // correct precision. + bool expression_has_highp = false; + bool expression_has_mediump = false; + + for (uint32_t i = 0; i < length; i++) + { + uint32_t arg = args[i]; + + auto handle_type = ir.ids[arg].get_type(); + if (handle_type == TypeConstant || handle_type == TypeConstantOp || handle_type == TypeUndef) + continue; + + if (has_decoration(arg, DecorationRelaxedPrecision)) + expression_has_mediump = true; + else + expression_has_highp = true; + } + + if (expression_has_highp) + return Options::Highp; + else if (expression_has_mediump) + return Options::Mediump; + else + return Options::DontCare; +} + +void CompilerGLSL::analyze_precision_requirements(uint32_t type_id, uint32_t dst_id, uint32_t *args, uint32_t length) +{ + if (!backend.requires_relaxed_precision_analysis) + return; + + auto &type = get<SPIRType>(type_id); + + // RelaxedPrecision only applies to 32-bit values. + if (type.basetype != SPIRType::Float && type.basetype != SPIRType::Int && type.basetype != SPIRType::UInt) + return; + + bool operation_is_highp = !has_decoration(dst_id, DecorationRelaxedPrecision); + + auto input_precision = analyze_expression_precision(args, length); + if (input_precision == Options::DontCare) + { + consume_temporary_in_precision_context(type_id, dst_id, input_precision); + return; + } + + // In SPIR-V and GLSL, the semantics are flipped for how relaxed precision is determined. + // In SPIR-V, the operation itself marks RelaxedPrecision, meaning that inputs can be truncated to 16-bit. + // However, if the expression is not, inputs must be expanded to 32-bit first, + // since the operation must run at high precision. + // This is the awkward part, because if we have mediump inputs, or expressions which derived from mediump, + // we might have to forcefully bind the source IDs to highp temporaries. This is done by clearing decorations + // and forcing temporaries. Similarly for mediump operations. We bind highp expressions to mediump variables. + if ((operation_is_highp && input_precision == Options::Mediump) || + (!operation_is_highp && input_precision == Options::Highp)) + { + auto precision = operation_is_highp ? Options::Highp : Options::Mediump; + for (uint32_t i = 0; i < length; i++) + { + // Rewrites the opcode so that we consume an ID in correct precision context. + // This is pretty hacky, but it's the most straight forward way of implementing this without adding + // lots of extra passes to rewrite all code blocks. + args[i] = consume_temporary_in_precision_context(expression_type_id(args[i]), args[i], precision); + } + } +} + +// This is probably not exhaustive ... +static bool opcode_is_precision_sensitive_operation(Op op) +{ + switch (op) + { + case OpFAdd: + case OpFSub: + case OpFMul: + case OpFNegate: + case OpIAdd: + case OpISub: + case OpIMul: + case OpSNegate: + case OpFMod: + case OpFDiv: + case OpFRem: + case OpSMod: + case OpSDiv: + case OpSRem: + case OpUMod: + case OpUDiv: + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + case OpMatrixTimesMatrix: + case OpDPdx: + case OpDPdy: + case OpDPdxCoarse: + case OpDPdyCoarse: + case OpDPdxFine: + case OpDPdyFine: + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + case OpVectorTimesScalar: + case OpMatrixTimesScalar: + case OpOuterProduct: + case OpFConvert: + case OpSConvert: + case OpUConvert: + case OpConvertSToF: + case OpConvertUToF: + case OpConvertFToU: + case OpConvertFToS: + return true; + + default: + return false; + } +} + +// Instructions which just load data but don't do any arithmetic operation should just inherit the decoration. +// SPIR-V doesn't require this, but it's somewhat implied it has to work this way, relaxed precision is only +// relevant when operating on the IDs, not when shuffling things around. +static bool opcode_is_precision_forwarding_instruction(Op op, uint32_t &arg_count) +{ + switch (op) + { + case OpLoad: + case OpAccessChain: + case OpInBoundsAccessChain: + case OpCompositeExtract: + case OpVectorExtractDynamic: + case OpSampledImage: + case OpImage: + case OpCopyObject: + + case OpImageRead: + case OpImageFetch: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageGather: + case OpImageDrefGather: + case OpImageSparseRead: + case OpImageSparseFetch: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageSparseGather: + case OpImageSparseDrefGather: + arg_count = 1; + return true; + + case OpVectorShuffle: + arg_count = 2; + return true; + + case OpCompositeConstruct: + return true; + + default: + break; + } + + return false; +} + +CompilerGLSL::TemporaryCopy CompilerGLSL::handle_instruction_precision(const Instruction &instruction) +{ + auto ops = stream_mutable(instruction); + auto opcode = static_cast<Op>(instruction.op); + uint32_t length = instruction.length; + + if (backend.requires_relaxed_precision_analysis) + { + if (length > 2) + { + uint32_t forwarding_length = length - 2; + + if (opcode_is_precision_sensitive_operation(opcode)) + analyze_precision_requirements(ops[0], ops[1], &ops[2], forwarding_length); + else if (opcode == OpExtInst && length >= 5 && get<SPIRExtension>(ops[2]).ext == SPIRExtension::GLSL) + analyze_precision_requirements(ops[0], ops[1], &ops[4], forwarding_length - 2); + else if (opcode_is_precision_forwarding_instruction(opcode, forwarding_length)) + forward_relaxed_precision(ops[1], &ops[2], forwarding_length); + } + + uint32_t result_type = 0, result_id = 0; + if (instruction_to_result_type(result_type, result_id, opcode, ops, length)) + { + auto itr = temporary_to_mirror_precision_alias.find(ops[1]); + if (itr != temporary_to_mirror_precision_alias.end()) + return { itr->second, itr->first }; + } + } + + return {}; +} + +void CompilerGLSL::emit_instruction(const Instruction &instruction) +{ + auto ops = stream(instruction); + auto opcode = static_cast<Op>(instruction.op); + uint32_t length = instruction.length; + +#define GLSL_BOP(op) emit_binary_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_BOP_CAST(op, type) \ + emit_binary_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, \ + opcode_is_sign_invariant(opcode), implicit_integer_promotion) +#define GLSL_UOP(op) emit_unary_op(ops[0], ops[1], ops[2], #op) +#define GLSL_UOP_CAST(op) emit_unary_op_cast(ops[0], ops[1], ops[2], #op) +#define GLSL_QFOP(op) emit_quaternary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], #op) +#define GLSL_TFOP(op) emit_trinary_func_op(ops[0], ops[1], ops[2], ops[3], ops[4], #op) +#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_BFOP_CAST(op, type) \ + emit_binary_func_op_cast(ops[0], ops[1], ops[2], ops[3], #op, type, opcode_is_sign_invariant(opcode)) +#define GLSL_BFOP(op) emit_binary_func_op(ops[0], ops[1], ops[2], ops[3], #op) +#define GLSL_UFOP(op) emit_unary_func_op(ops[0], ops[1], ops[2], #op) + + // If we need to do implicit bitcasts, make sure we do it with the correct type. + uint32_t integer_width = get_integer_width_for_instruction(instruction); + auto int_type = to_signed_basetype(integer_width); + auto uint_type = to_unsigned_basetype(integer_width); + + // Handle C implicit integer promotion rules. + // If we get implicit promotion to int, need to make sure we cast by value to intended return type, + // otherwise, future sign-dependent operations and bitcasts will break. + bool implicit_integer_promotion = integer_width < 32 && backend.implicit_c_integer_promotion_rules && + opcode_can_promote_integer_implicitly(opcode) && + get<SPIRType>(ops[0]).vecsize == 1; + + opcode = get_remapped_spirv_op(opcode); + + switch (opcode) + { + // Dealing with memory + case OpLoad: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + + flush_variable_declaration(ptr); + + // If we're loading from memory that cannot be changed by the shader, + // just forward the expression directly to avoid needless temporaries. + // If an expression is mutable and forwardable, we speculate that it is immutable. + bool forward = should_forward(ptr) && forced_temporaries.find(id) == end(forced_temporaries); + + // If loading a non-native row-major matrix, mark the expression as need_transpose. + bool need_transpose = false; + bool old_need_transpose = false; + + auto *ptr_expression = maybe_get<SPIRExpression>(ptr); + + if (forward) + { + // If we're forwarding the load, we're also going to forward transpose state, so don't transpose while + // taking the expression. + if (ptr_expression && ptr_expression->need_transpose) + { + old_need_transpose = true; + ptr_expression->need_transpose = false; + need_transpose = true; + } + else if (is_non_native_row_major_matrix(ptr)) + need_transpose = true; + } + + // If we are forwarding this load, + // don't register the read to access chain here, defer that to when we actually use the expression, + // using the add_implied_read_expression mechanism. + string expr; + + bool is_packed = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked); + bool is_remapped = has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID); + if (forward || (!is_packed && !is_remapped)) + { + // For the simple case, we do not need to deal with repacking. + expr = to_dereferenced_expression(ptr, false); + } + else + { + // If we are not forwarding the expression, we need to unpack and resolve any physical type remapping here before + // storing the expression to a temporary. + expr = to_unpacked_expression(ptr); + } + + auto &type = get<SPIRType>(result_type); + auto &expr_type = expression_type(ptr); + + // If the expression has more vector components than the result type, insert + // a swizzle. This shouldn't happen normally on valid SPIR-V, but it might + // happen with e.g. the MSL backend replacing the type of an input variable. + if (expr_type.vecsize > type.vecsize) + expr = enclose_expression(expr + vector_swizzle(type.vecsize, 0)); + + if (forward && ptr_expression) + ptr_expression->need_transpose = old_need_transpose; + + // We might need to cast in order to load from a builtin. + cast_from_variable_load(ptr, expr, type); + + if (forward && ptr_expression) + ptr_expression->need_transpose = false; + + // We might be trying to load a gl_Position[N], where we should be + // doing float4[](gl_in[i].gl_Position, ...) instead. + // Similar workarounds are required for input arrays in tessellation. + // Also, loading from gl_SampleMask array needs special unroll. + unroll_array_from_complex_load(id, ptr, expr); + + if (!type_is_opaque_value(type) && has_decoration(ptr, DecorationNonUniform)) + { + // If we're loading something non-opaque, we need to handle non-uniform descriptor access. + convert_non_uniform_expression(expr, ptr); + } + + if (forward && ptr_expression) + ptr_expression->need_transpose = old_need_transpose; + + bool flattened = ptr_expression && flattened_buffer_blocks.count(ptr_expression->loaded_from) != 0; + + if (backend.needs_row_major_load_workaround && !is_non_native_row_major_matrix(ptr) && !flattened) + rewrite_load_for_wrapped_row_major(expr, result_type, ptr); + + // By default, suppress usage tracking since using same expression multiple times does not imply any extra work. + // However, if we try to load a complex, composite object from a flattened buffer, + // we should avoid emitting the same code over and over and lower the result to a temporary. + bool usage_tracking = flattened && (type.basetype == SPIRType::Struct || (type.columns > 1)); + + SPIRExpression *e = nullptr; + if (!forward && expression_is_non_value_type_array(ptr)) + { + // Complicated load case where we need to make a copy of ptr, but we cannot, because + // it is an array, and our backend does not support arrays as value types. + // Emit the temporary, and copy it explicitly. + e = &emit_uninitialized_temporary_expression(result_type, id); + emit_array_copy(nullptr, id, ptr, StorageClassFunction, get_expression_effective_storage_class(ptr)); + } + else + e = &emit_op(result_type, id, expr, forward, !usage_tracking); + + e->need_transpose = need_transpose; + register_read(id, ptr, forward); + + if (forward) + { + // Pass through whether the result is of a packed type and the physical type ID. + if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypePacked)) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (has_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)) + { + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, + get_extended_decoration(ptr, SPIRVCrossDecorationPhysicalTypeID)); + } + } + else + { + // This might have been set on an earlier compilation iteration, force it to be unset. + unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + unset_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID); + } + + inherit_expression_dependencies(id, ptr); + if (forward) + add_implied_read_expression(*e, ptr); + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + auto *var = maybe_get<SPIRVariable>(ops[2]); + if (var) + flush_variable_declaration(var->self); + + // If the base is immutable, the access chain pointer must also be. + // If an expression is mutable and forwardable, we speculate that it is immutable. + AccessChainMeta meta; + bool ptr_chain = opcode == OpPtrAccessChain; + auto &target_type = get<SPIRType>(ops[0]); + auto e = access_chain(ops[2], &ops[3], length - 3, target_type, &meta, ptr_chain); + + // If the base is flattened UBO of struct type, the expression has to be a composite. + // In that case, backends which do not support inline syntax need it to be bound to a temporary. + // Otherwise, invalid expressions like ({UBO[0].xyz, UBO[0].w, UBO[1]}).member are emitted. + bool requires_temporary = false; + if (flattened_buffer_blocks.count(ops[2]) && target_type.basetype == SPIRType::Struct) + requires_temporary = !backend.can_declare_struct_inline; + + auto &expr = requires_temporary ? + emit_op(ops[0], ops[1], std::move(e), false) : + set<SPIRExpression>(ops[1], std::move(e), ops[0], should_forward(ops[2])); + + auto *backing_variable = maybe_get_backing_variable(ops[2]); + expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); + expr.need_transpose = meta.need_transpose; + expr.access_chain = true; + expr.access_meshlet_position_y = meta.access_meshlet_position_y; + + // Mark the result as being packed. Some platforms handled packed vectors differently than non-packed. + if (meta.storage_is_packed) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(ops[1], SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); + if (meta.storage_is_invariant) + set_decoration(ops[1], DecorationInvariant); + if (meta.flattened_struct) + flattened_structs[ops[1]] = true; + if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) + set_decoration(ops[1], DecorationRelaxedPrecision); + + // If we have some expression dependencies in our access chain, this access chain is technically a forwarded + // temporary which could be subject to invalidation. + // Need to assume we're forwarded while calling inherit_expression_depdendencies. + forwarded_temporaries.insert(ops[1]); + // The access chain itself is never forced to a temporary, but its dependencies might. + suppressed_usage_tracking.insert(ops[1]); + + for (uint32_t i = 2; i < length; i++) + { + inherit_expression_dependencies(ops[1], ops[i]); + add_implied_read_expression(expr, ops[i]); + } + + // If we have no dependencies after all, i.e., all indices in the access chain are immutable temporaries, + // we're not forwarded after all. + if (expr.expression_dependencies.empty()) + forwarded_temporaries.erase(ops[1]); + + break; + } + + case OpStore: + { + auto *var = maybe_get<SPIRVariable>(ops[0]); + + if (var && var->statically_assigned) + var->static_expression = ops[1]; + else if (var && var->loop_variable && !var->loop_variable_enable) + var->static_expression = ops[1]; + else if (var && var->remapped_variable && var->static_expression) + { + // Skip the write. + } + else if (flattened_structs.count(ops[0])) + { + store_flattened_struct(ops[0], ops[1]); + register_write(ops[0]); + } + else + { + emit_store_statement(ops[0], ops[1]); + } + + // Storing a pointer results in a variable pointer, so we must conservatively assume + // we can write through it. + if (expression_type(ops[1]).pointer) + register_write(ops[1]); + break; + } + + case OpArrayLength: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto e = access_chain_internal(ops[2], &ops[3], length - 3, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + if (has_decoration(ops[2], DecorationNonUniform)) + convert_non_uniform_expression(e, ops[2]); + set<SPIRExpression>(id, join(type_to_glsl(get<SPIRType>(result_type)), "(", e, ".length())"), result_type, + true); + break; + } + + // Function calls + case OpFunctionCall: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t func = ops[2]; + const auto *arg = &ops[3]; + length -= 3; + + auto &callee = get<SPIRFunction>(func); + auto &return_type = get<SPIRType>(callee.return_type); + bool pure = function_is_pure(callee); + bool control_dependent = function_is_control_dependent(callee); + + bool callee_has_out_variables = false; + bool emit_return_value_as_argument = false; + + // Invalidate out variables passed to functions since they can be OpStore'd to. + for (uint32_t i = 0; i < length; i++) + { + if (callee.arguments[i].write_count) + { + register_call_out_argument(arg[i]); + callee_has_out_variables = true; + } + + flush_variable_declaration(arg[i]); + } + + if (!return_type.array.empty() && !backend.can_return_array) + { + callee_has_out_variables = true; + emit_return_value_as_argument = true; + } + + if (!pure) + register_impure_function_call(); + + string funexpr; + SmallVector<string> arglist; + funexpr += to_name(func) + "("; + + if (emit_return_value_as_argument) + { + statement(type_to_glsl(return_type), " ", to_name(id), type_to_array_glsl(return_type, 0), ";"); + arglist.push_back(to_name(id)); + } + + for (uint32_t i = 0; i < length; i++) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg[i])) + continue; + + arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i])); + } + + for (auto &combined : callee.combined_parameters) + { + auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]); + auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]); + arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); + } + + append_global_func_args(callee, length, arglist); + + funexpr += merge(arglist); + funexpr += ")"; + + // Check for function call constraints. + check_function_call_constraints(arg, length); + + if (return_type.basetype != SPIRType::Void) + { + // If the function actually writes to an out variable, + // take the conservative route and do not forward. + // The problem is that we might not read the function + // result (and emit the function) before an out variable + // is read (common case when return value is ignored! + // In order to avoid start tracking invalid variables, + // just avoid the forwarding problem altogether. + bool forward = args_will_forward(id, arg, length, pure) && !callee_has_out_variables && pure && + (forced_temporaries.find(id) == end(forced_temporaries)); + + if (emit_return_value_as_argument) + { + statement(funexpr, ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + } + else + emit_op(result_type, id, funexpr, forward); + + // Function calls are implicit loads from all variables in question. + // Set dependencies for them. + for (uint32_t i = 0; i < length; i++) + register_read(id, arg[i], forward); + + // If we're going to forward the temporary result, + // put dependencies on every variable that must not change. + if (forward) + register_global_read_dependencies(callee, id); + } + else + statement(funexpr, ";"); + + if (control_dependent) + register_control_dependent_expression(id); + + break; + } + + // Composite munging + case OpCompositeConstruct: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + const auto *const elems = &ops[2]; + length -= 2; + + bool forward = true; + for (uint32_t i = 0; i < length; i++) + forward = forward && should_forward(elems[i]); + + auto &out_type = get<SPIRType>(result_type); + auto *in_type = length > 0 ? &expression_type(elems[0]) : nullptr; + + // Only splat if we have vector constructors. + // Arrays and structs must be initialized properly in full. + bool composite = !out_type.array.empty() || out_type.basetype == SPIRType::Struct; + + bool splat = false; + bool swizzle_splat = false; + + if (in_type) + { + splat = in_type->vecsize == 1 && in_type->columns == 1 && !composite && backend.use_constructor_splatting; + swizzle_splat = in_type->vecsize == 1 && in_type->columns == 1 && backend.can_swizzle_scalar; + + if (ir.ids[elems[0]].get_type() == TypeConstant && !type_is_floating_point(*in_type)) + { + // Cannot swizzle literal integers as a special case. + swizzle_splat = false; + } + } + + if (splat || swizzle_splat) + { + uint32_t input = elems[0]; + for (uint32_t i = 0; i < length; i++) + { + if (input != elems[i]) + { + splat = false; + swizzle_splat = false; + } + } + } + + if (out_type.basetype == SPIRType::Struct && !backend.can_declare_struct_inline) + forward = false; + if (!out_type.array.empty() && !backend.can_declare_arrays_inline) + forward = false; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + forward = false; + + string constructor_op; + if (backend.use_initializer_list && composite) + { + bool needs_trailing_tracket = false; + // Only use this path if we are building composites. + // This path cannot be used for arithmetic. + if (backend.use_typed_initializer_list && out_type.basetype == SPIRType::Struct && out_type.array.empty()) + constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)); + else if (backend.use_typed_initializer_list && backend.array_is_value_type && !out_type.array.empty()) + { + // MSL path. Array constructor is baked into type here, do not use _constructor variant. + constructor_op += type_to_glsl_constructor(get<SPIRType>(result_type)) + "("; + needs_trailing_tracket = true; + } + constructor_op += "{ "; + + if (type_is_empty(out_type) && !backend.supports_empty_struct) + constructor_op += "0"; + else if (splat) + constructor_op += to_unpacked_expression(elems[0]); + else + constructor_op += build_composite_combiner(result_type, elems, length); + constructor_op += " }"; + if (needs_trailing_tracket) + constructor_op += ")"; + } + else if (swizzle_splat && !composite) + { + constructor_op = remap_swizzle(get<SPIRType>(result_type), 1, to_unpacked_expression(elems[0])); + } + else + { + constructor_op = type_to_glsl_constructor(get<SPIRType>(result_type)) + "("; + if (type_is_empty(out_type) && !backend.supports_empty_struct) + constructor_op += "0"; + else if (splat) + constructor_op += to_unpacked_expression(elems[0]); + else + constructor_op += build_composite_combiner(result_type, elems, length); + constructor_op += ")"; + } + + if (!constructor_op.empty()) + { + emit_op(result_type, id, constructor_op, forward); + for (uint32_t i = 0; i < length; i++) + inherit_expression_dependencies(id, elems[i]); + } + break; + } + + case OpVectorInsertDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec = ops[2]; + uint32_t comp = ops[3]; + uint32_t index = ops[4]; + + flush_variable_declaration(vec); + + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(vec), ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + auto chain = access_chain_internal(id, &index, 1, 0, nullptr); + statement(chain, " = ", to_unpacked_expression(comp), ";"); + break; + } + + case OpVectorExtractDynamic: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto expr = access_chain_internal(ops[2], &ops[3], 1, 0, nullptr); + emit_op(result_type, id, expr, should_forward(ops[2])); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpCompositeExtract: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + length -= 3; + + auto &type = get<SPIRType>(result_type); + + // We can only split the expression here if our expression is forwarded as a temporary. + bool allow_base_expression = forced_temporaries.find(id) == end(forced_temporaries); + + // Do not allow base expression for struct members. We risk doing "swizzle" optimizations in this case. + auto &composite_type = expression_type(ops[2]); + bool composite_type_is_complex = composite_type.basetype == SPIRType::Struct || !composite_type.array.empty(); + if (composite_type_is_complex) + allow_base_expression = false; + + // Packed expressions or physical ID mapped expressions cannot be split up. + if (has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypePacked) || + has_extended_decoration(ops[2], SPIRVCrossDecorationPhysicalTypeID)) + allow_base_expression = false; + + // Cannot use base expression for row-major matrix row-extraction since we need to interleave access pattern + // into the base expression. + if (is_non_native_row_major_matrix(ops[2])) + allow_base_expression = false; + + AccessChainMeta meta; + SPIRExpression *e = nullptr; + auto *c = maybe_get<SPIRConstant>(ops[2]); + + if (c && !c->specialization && !composite_type_is_complex) + { + auto expr = to_extract_constant_composite_expression(result_type, *c, ops + 3, length); + e = &emit_op(result_type, id, expr, true, true); + } + else if (allow_base_expression && should_forward(ops[2]) && type.vecsize == 1 && type.columns == 1 && length == 1) + { + // Only apply this optimization if result is scalar. + + // We want to split the access chain from the base. + // This is so we can later combine different CompositeExtract results + // with CompositeConstruct without emitting code like + // + // vec3 temp = texture(...).xyz + // vec4(temp.x, temp.y, temp.z, 1.0). + // + // when we actually wanted to emit this + // vec4(texture(...).xyz, 1.0). + // + // Including the base will prevent this and would trigger multiple reads + // from expression causing it to be forced to an actual temporary in GLSL. + auto expr = access_chain_internal(ops[2], &ops[3], length, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_CHAIN_ONLY_BIT | + ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); + e = &emit_op(result_type, id, expr, true, should_suppress_usage_tracking(ops[2])); + inherit_expression_dependencies(id, ops[2]); + e->base_expression = ops[2]; + + if (meta.relaxed_precision && backend.requires_relaxed_precision_analysis) + set_decoration(ops[1], DecorationRelaxedPrecision); + } + else + { + auto expr = access_chain_internal(ops[2], &ops[3], length, + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT | ACCESS_CHAIN_FORCE_COMPOSITE_BIT, &meta); + e = &emit_op(result_type, id, expr, should_forward(ops[2]), should_suppress_usage_tracking(ops[2])); + inherit_expression_dependencies(id, ops[2]); + } + + // Pass through some meta information to the loaded expression. + // We can still end up loading a buffer type to a variable, then CompositeExtract from it + // instead of loading everything through an access chain. + e->need_transpose = meta.need_transpose; + if (meta.storage_is_packed) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypePacked); + if (meta.storage_physical_type != 0) + set_extended_decoration(id, SPIRVCrossDecorationPhysicalTypeID, meta.storage_physical_type); + if (meta.storage_is_invariant) + set_decoration(id, DecorationInvariant); + + break; + } + + case OpCompositeInsert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t obj = ops[2]; + uint32_t composite = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + flush_variable_declaration(composite); + + // CompositeInsert requires a copy + modification, but this is very awkward code in HLL. + // Speculate that the input composite is no longer used, and we can modify it in-place. + // There are various scenarios where this is not possible to satisfy. + bool can_modify_in_place = true; + forced_temporaries.insert(id); + + // Cannot safely RMW PHI variables since they have no way to be invalidated, + // forcing temporaries is not going to help. + // This is similar for Constant and Undef inputs. + // The only safe thing to RMW is SPIRExpression. + // If the expression has already been used (i.e. used in a continue block), we have to keep using + // that loop variable, since we won't be able to override the expression after the fact. + // If the composite is hoisted, we might never be able to properly invalidate any usage + // of that composite in a subsequent loop iteration. + if (invalid_expressions.count(composite) || + block_composite_insert_overwrite.count(composite) || + hoisted_temporaries.count(id) || hoisted_temporaries.count(composite) || + maybe_get<SPIRExpression>(composite) == nullptr) + { + can_modify_in_place = false; + } + else if (backend.requires_relaxed_precision_analysis && + has_decoration(composite, DecorationRelaxedPrecision) != + has_decoration(id, DecorationRelaxedPrecision) && + get<SPIRType>(result_type).basetype != SPIRType::Struct) + { + // Similarly, if precision does not match for input and output, + // we cannot alias them. If we write a composite into a relaxed precision + // ID, we might get a false truncation. + can_modify_in_place = false; + } + + if (can_modify_in_place) + { + // Have to make sure the modified SSA value is bound to a temporary so we can modify it in-place. + if (!forced_temporaries.count(composite)) + force_temporary_and_recompile(composite); + + auto chain = access_chain_internal(composite, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + statement(chain, " = ", to_unpacked_expression(obj), ";"); + set<SPIRExpression>(id, to_expression(composite), result_type, true); + invalid_expressions.insert(composite); + composite_insert_overwritten.insert(composite); + } + else + { + if (maybe_get<SPIRUndef>(composite) != nullptr) + { + emit_uninitialized_temporary_expression(result_type, id); + } + else + { + // Make a copy, then use access chain to store the variable. + statement(declare_temporary(result_type, id), to_expression(composite), ";"); + set<SPIRExpression>(id, to_name(id), result_type, true); + } + + auto chain = access_chain_internal(id, elems, length, ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, nullptr); + statement(chain, " = ", to_unpacked_expression(obj), ";"); + } + + break; + } + + case OpCopyMemory: + { + uint32_t lhs = ops[0]; + uint32_t rhs = ops[1]; + if (lhs != rhs) + { + uint32_t &tmp_id = extra_sub_expressions[instruction.offset | EXTRA_SUB_EXPRESSION_TYPE_STREAM_OFFSET]; + if (!tmp_id) + tmp_id = ir.increase_bound_by(1); + uint32_t tmp_type_id = expression_type(rhs).parent_type; + + EmbeddedInstruction fake_load, fake_store; + fake_load.op = OpLoad; + fake_load.length = 3; + fake_load.ops.push_back(tmp_type_id); + fake_load.ops.push_back(tmp_id); + fake_load.ops.push_back(rhs); + + fake_store.op = OpStore; + fake_store.length = 2; + fake_store.ops.push_back(lhs); + fake_store.ops.push_back(tmp_id); + + // Load and Store do a *lot* of workarounds, and we'd like to reuse them as much as possible. + // Synthesize a fake Load and Store pair for CopyMemory. + emit_instruction(fake_load); + emit_instruction(fake_store); + } + break; + } + + case OpCopyLogical: + { + // This is used for copying object of different types, arrays and structs. + // We need to unroll the copy, element-by-element. + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t rhs = ops[2]; + + emit_uninitialized_temporary_expression(result_type, id); + emit_copy_logical_type(id, result_type, rhs, expression_type_id(rhs), {}); + break; + } + + case OpCopyObject: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t rhs = ops[2]; + bool pointer = get<SPIRType>(result_type).pointer; + + auto *chain = maybe_get<SPIRAccessChain>(rhs); + auto *imgsamp = maybe_get<SPIRCombinedImageSampler>(rhs); + if (chain) + { + // Cannot lower to a SPIRExpression, just copy the object. + auto &e = set<SPIRAccessChain>(id, *chain); + e.self = id; + } + else if (imgsamp) + { + // Cannot lower to a SPIRExpression, just copy the object. + // GLSL does not currently use this type and will never get here, but MSL does. + // Handled here instead of CompilerMSL for better integration and general handling, + // and in case GLSL or other subclasses require it in the future. + auto &e = set<SPIRCombinedImageSampler>(id, *imgsamp); + e.self = id; + } + else if (expression_is_lvalue(rhs) && !pointer) + { + // Need a copy. + // For pointer types, we copy the pointer itself. + emit_op(result_type, id, to_unpacked_expression(rhs), false); + } + else + { + // RHS expression is immutable, so just forward it. + // Copying these things really make no sense, but + // seems to be allowed anyways. + auto &e = emit_op(result_type, id, to_expression(rhs), true, true); + if (pointer) + { + auto *var = maybe_get_backing_variable(rhs); + e.loaded_from = var ? var->self : ID(0); + } + + // If we're copying an access chain, need to inherit the read expressions. + auto *rhs_expr = maybe_get<SPIRExpression>(rhs); + if (rhs_expr) + { + e.implied_read_expressions = rhs_expr->implied_read_expressions; + e.expression_dependencies = rhs_expr->expression_dependencies; + } + } + break; + } + + case OpVectorShuffle: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t vec0 = ops[2]; + uint32_t vec1 = ops[3]; + const auto *elems = &ops[4]; + length -= 4; + + auto &type0 = expression_type(vec0); + + // If we have the undefined swizzle index -1, we need to swizzle in undefined data, + // or in our case, T(0). + bool shuffle = false; + for (uint32_t i = 0; i < length; i++) + if (elems[i] >= type0.vecsize || elems[i] == 0xffffffffu) + shuffle = true; + + // Cannot use swizzles with packed expressions, force shuffle path. + if (!shuffle && has_extended_decoration(vec0, SPIRVCrossDecorationPhysicalTypePacked)) + shuffle = true; + + string expr; + bool should_fwd, trivial_forward; + + if (shuffle) + { + should_fwd = should_forward(vec0) && should_forward(vec1); + trivial_forward = should_suppress_usage_tracking(vec0) && should_suppress_usage_tracking(vec1); + + // Constructor style and shuffling from two different vectors. + SmallVector<string> args; + for (uint32_t i = 0; i < length; i++) + { + if (elems[i] == 0xffffffffu) + { + // Use a constant 0 here. + // We could use the first component or similar, but then we risk propagating + // a value we might not need, and bog down codegen. + SPIRConstant c; + c.constant_type = type0.parent_type; + assert(type0.parent_type != ID(0)); + args.push_back(constant_expression(c)); + } + else if (elems[i] >= type0.vecsize) + args.push_back(to_extract_component_expression(vec1, elems[i] - type0.vecsize)); + else + args.push_back(to_extract_component_expression(vec0, elems[i])); + } + expr += join(type_to_glsl_constructor(get<SPIRType>(result_type)), "(", merge(args), ")"); + } + else + { + should_fwd = should_forward(vec0); + trivial_forward = should_suppress_usage_tracking(vec0); + + // We only source from first vector, so can use swizzle. + // If the vector is packed, unpack it before applying a swizzle (needed for MSL) + expr += to_enclosed_unpacked_expression(vec0); + expr += "."; + for (uint32_t i = 0; i < length; i++) + { + assert(elems[i] != 0xffffffffu); + expr += index_to_swizzle(elems[i]); + } + + if (backend.swizzle_is_function && length > 1) + expr += "()"; + } + + // A shuffle is trivial in that it doesn't actually *do* anything. + // We inherit the forwardedness from our arguments to avoid flushing out to temporaries when it's not really needed. + + emit_op(result_type, id, expr, should_fwd, trivial_forward); + + inherit_expression_dependencies(id, vec0); + if (vec0 != vec1) + inherit_expression_dependencies(id, vec1); + break; + } + + // ALU + case OpIsNan: + if (!is_legacy()) + GLSL_UFOP(isnan); + else + { + // Check if the number doesn't equal itself + auto &type = get<SPIRType>(ops[0]); + if (type.vecsize > 1) + emit_binary_func_op(ops[0], ops[1], ops[2], ops[2], "notEqual"); + else + emit_binary_op(ops[0], ops[1], ops[2], ops[2], "!="); + } + break; + + case OpIsInf: + if (!is_legacy()) + GLSL_UFOP(isinf); + else + { + // inf * 2 == inf by IEEE 754 rules, note this also applies to 0.0 + // This is more reliable than checking if product with zero is NaN + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t operand = ops[2]; + + auto &type = get<SPIRType>(result_type); + std::string expr; + if (type.vecsize > 1) + { + expr = type_to_glsl_constructor(type); + expr += '('; + for (uint32_t i = 0; i < type.vecsize; i++) + { + auto comp = to_extract_component_expression(operand, i); + expr += join(comp, " != 0.0 && 2.0 * ", comp, " == ", comp); + + if (i + 1 < type.vecsize) + expr += ", "; + } + expr += ')'; + } + else + { + // Register an extra read to force writing out a temporary + auto oper = to_enclosed_expression(operand); + track_expression_read(operand); + expr += join(oper, " != 0.0 && 2.0 * ", oper, " == ", oper); + } + emit_op(result_type, result_id, expr, should_forward(operand)); + + inherit_expression_dependencies(result_id, operand); + } + break; + + case OpSNegate: + if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0]) + GLSL_UOP_CAST(-); + else + GLSL_UOP(-); + break; + + case OpFNegate: + GLSL_UOP(-); + break; + + case OpIAdd: + { + // For simple arith ops, prefer the output type if there's a mismatch to avoid extra bitcasts. + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(+, type); + break; + } + + case OpFAdd: + GLSL_BOP(+); + break; + + case OpISub: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(-, type); + break; + } + + case OpFSub: + GLSL_BOP(-); + break; + + case OpIMul: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(*, type); + break; + } + + case OpVectorTimesMatrix: + case OpMatrixTimesVector: + { + // If the matrix needs transpose, just flip the multiply order. + auto *e = maybe_get<SPIRExpression>(ops[opcode == OpMatrixTimesVector ? 2 : 3]); + if (e && e->need_transpose) + { + e->need_transpose = false; + string expr; + + if (opcode == OpMatrixTimesVector) + expr = join(to_enclosed_unpacked_expression(ops[3]), " * ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); + else + expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", + to_enclosed_unpacked_expression(ops[2])); + + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + emit_op(ops[0], ops[1], expr, forward); + e->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + GLSL_BOP(*); + break; + } + + case OpMatrixTimesMatrix: + { + auto *a = maybe_get<SPIRExpression>(ops[2]); + auto *b = maybe_get<SPIRExpression>(ops[3]); + + // If both matrices need transpose, we can multiply in flipped order and tag the expression as transposed. + // a^T * b^T = (b * a)^T. + if (a && b && a->need_transpose && b->need_transpose) + { + a->need_transpose = false; + b->need_transpose = false; + auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[3])), " * ", + enclose_expression(to_unpacked_row_major_matrix_expression(ops[2]))); + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + auto &e = emit_op(ops[0], ops[1], expr, forward); + e.need_transpose = true; + a->need_transpose = true; + b->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + GLSL_BOP(*); + + break; + } + + case OpMatrixTimesScalar: + { + auto *a = maybe_get<SPIRExpression>(ops[2]); + + // If the matrix need transpose, just mark the result as needing so. + if (a && a->need_transpose) + { + a->need_transpose = false; + auto expr = join(enclose_expression(to_unpacked_row_major_matrix_expression(ops[2])), " * ", + to_enclosed_unpacked_expression(ops[3])); + bool forward = should_forward(ops[2]) && should_forward(ops[3]); + auto &e = emit_op(ops[0], ops[1], expr, forward); + e.need_transpose = true; + a->need_transpose = true; + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + } + else + GLSL_BOP(*); + break; + } + + case OpFMul: + case OpVectorTimesScalar: + GLSL_BOP(*); + break; + + case OpOuterProduct: + if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t a = ops[2]; + uint32_t b = ops[3]; + + auto &type = get<SPIRType>(result_type); + string expr = type_to_glsl_constructor(type); + expr += "("; + for (uint32_t col = 0; col < type.columns; col++) + { + expr += to_enclosed_expression(a); + expr += " * "; + expr += to_extract_component_expression(b, col); + if (col + 1 < type.columns) + expr += ", "; + } + expr += ")"; + emit_op(result_type, id, expr, should_forward(a) && should_forward(b)); + inherit_expression_dependencies(id, a); + inherit_expression_dependencies(id, b); + } + else + GLSL_BFOP(outerProduct); + break; + + case OpDot: + GLSL_BFOP(dot); + break; + + case OpTranspose: + if (options.version < 120) // Matches GLSL 1.10 / ESSL 1.00 + { + // transpose() is not available, so instead, flip need_transpose, + // which can later be turned into an emulated transpose op by + // convert_row_major_matrix(), if necessary. + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t input = ops[2]; + + // Force need_transpose to false temporarily to prevent + // to_expression() from doing the transpose. + bool need_transpose = false; + auto *input_e = maybe_get<SPIRExpression>(input); + if (input_e) + swap(need_transpose, input_e->need_transpose); + + bool forward = should_forward(input); + auto &e = emit_op(result_type, result_id, to_expression(input), forward); + e.need_transpose = !need_transpose; + + // Restore the old need_transpose flag. + if (input_e) + input_e->need_transpose = need_transpose; + } + else + GLSL_UFOP(transpose); + break; + + case OpSRem: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + // Needs special handling. + bool forward = should_forward(op0) && should_forward(op1); + auto expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + + if (implicit_integer_promotion) + expr = join(type_to_glsl(get<SPIRType>(result_type)), '(', expr, ')'); + + emit_op(result_type, result_id, expr, forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } + + case OpSDiv: + GLSL_BOP_CAST(/, int_type); + break; + + case OpUDiv: + GLSL_BOP_CAST(/, uint_type); + break; + + case OpIAddCarry: + case OpISubBorrow: + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 400."); + + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get<SPIRType>(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + const char *op = opcode == OpIAddCarry ? "uaddCarry" : "usubBorrow"; + + statement(to_expression(result_id), ".", to_member_name(type, 0), " = ", op, "(", to_expression(op0), ", ", + to_expression(op1), ", ", to_expression(result_id), ".", to_member_name(type, 1), ");"); + break; + } + + case OpUMulExtended: + case OpSMulExtended: + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Extended arithmetic is only available from ESSL 310."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Extended arithmetic is only available from GLSL 4000."); + + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + auto &type = get<SPIRType>(result_type); + emit_uninitialized_temporary_expression(result_type, result_id); + const char *op = opcode == OpUMulExtended ? "umulExtended" : "imulExtended"; + + statement(op, "(", to_expression(op0), ", ", to_expression(op1), ", ", to_expression(result_id), ".", + to_member_name(type, 1), ", ", to_expression(result_id), ".", to_member_name(type, 0), ");"); + break; + } + + case OpFDiv: + GLSL_BOP(/); + break; + + case OpShiftRightLogical: + GLSL_BOP_CAST(>>, uint_type); + break; + + case OpShiftRightArithmetic: + GLSL_BOP_CAST(>>, int_type); + break; + + case OpShiftLeftLogical: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(<<, type); + break; + } + + case OpBitwiseOr: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(|, type); + break; + } + + case OpBitwiseXor: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(^, type); + break; + } + + case OpBitwiseAnd: + { + auto type = get<SPIRType>(ops[0]).basetype; + GLSL_BOP_CAST(&, type); + break; + } + + case OpNot: + if (implicit_integer_promotion || expression_type_id(ops[2]) != ops[0]) + GLSL_UOP_CAST(~); + else + GLSL_UOP(~); + break; + + case OpUMod: + GLSL_BOP_CAST(%, uint_type); + break; + + case OpSMod: + GLSL_BOP_CAST(%, int_type); + break; + + case OpFMod: + GLSL_BFOP(mod); + break; + + case OpFRem: + { + uint32_t result_type = ops[0]; + uint32_t result_id = ops[1]; + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + // Needs special handling. + bool forward = should_forward(op0) && should_forward(op1); + std::string expr; + if (!is_legacy()) + { + expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", "trunc(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), ")"); + } + else + { + // Legacy GLSL has no trunc, emulate by casting to int and back + auto &op0_type = expression_type(op0); + auto via_type = op0_type; + via_type.basetype = SPIRType::Int; + expr = join(to_enclosed_expression(op0), " - ", to_enclosed_expression(op1), " * ", + type_to_glsl(op0_type), "(", type_to_glsl(via_type), "(", + to_enclosed_expression(op0), " / ", to_enclosed_expression(op1), "))"); + } + + emit_op(result_type, result_id, expr, forward); + inherit_expression_dependencies(result_id, op0); + inherit_expression_dependencies(result_id, op1); + break; + } + + // Relational + case OpAny: + GLSL_UFOP(any); + break; + + case OpAll: + GLSL_UFOP(all); + break; + + case OpSelect: + emit_mix_op(ops[0], ops[1], ops[4], ops[3], ops[2]); + break; + + case OpLogicalOr: + { + // No vector variant in GLSL for logical OR. + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get<SPIRType>(result_type); + + if (type.vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "||", false, SPIRType::Unknown); + else + GLSL_BOP(||); + break; + } + + case OpLogicalAnd: + { + // No vector variant in GLSL for logical AND. + auto result_type = ops[0]; + auto id = ops[1]; + auto &type = get<SPIRType>(result_type); + + if (type.vecsize > 1) + emit_unrolled_binary_op(result_type, id, ops[2], ops[3], "&&", false, SPIRType::Unknown); + else + GLSL_BOP(&&); + break; + } + + case OpLogicalNot: + { + auto &type = get<SPIRType>(ops[0]); + if (type.vecsize > 1) + GLSL_UFOP(not ); + else + GLSL_UOP(!); + break; + } + + case OpIEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(equal, int_type); + else + GLSL_BOP_CAST(==, int_type); + break; + } + + case OpLogicalEqual: + case OpFOrdEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(equal); + else + GLSL_BOP(==); + break; + } + + case OpINotEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(notEqual, int_type); + else + GLSL_BOP_CAST(!=, int_type); + break; + } + + case OpLogicalNotEqual: + case OpFOrdNotEqual: + case OpFUnordNotEqual: + { + // GLSL is fuzzy on what to do with ordered vs unordered not equal. + // glslang started emitting UnorderedNotEqual some time ago to harmonize with IEEE, + // but this means we have no easy way of implementing ordered not equal. + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(notEqual); + else + GLSL_BOP(!=); + break; + } + + case OpUGreaterThan: + case OpSGreaterThan: + { + auto type = opcode == OpUGreaterThan ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(greaterThan, type); + else + GLSL_BOP_CAST(>, type); + break; + } + + case OpFOrdGreaterThan: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(greaterThan); + else + GLSL_BOP(>); + break; + } + + case OpUGreaterThanEqual: + case OpSGreaterThanEqual: + { + auto type = opcode == OpUGreaterThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(greaterThanEqual, type); + else + GLSL_BOP_CAST(>=, type); + break; + } + + case OpFOrdGreaterThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(greaterThanEqual); + else + GLSL_BOP(>=); + break; + } + + case OpULessThan: + case OpSLessThan: + { + auto type = opcode == OpULessThan ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(lessThan, type); + else + GLSL_BOP_CAST(<, type); + break; + } + + case OpFOrdLessThan: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(lessThan); + else + GLSL_BOP(<); + break; + } + + case OpULessThanEqual: + case OpSLessThanEqual: + { + auto type = opcode == OpULessThanEqual ? uint_type : int_type; + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP_CAST(lessThanEqual, type); + else + GLSL_BOP_CAST(<=, type); + break; + } + + case OpFOrdLessThanEqual: + { + if (expression_type(ops[2]).vecsize > 1) + GLSL_BFOP(lessThanEqual); + else + GLSL_BOP(<=); + break; + } + + // Conversion + case OpSConvert: + case OpConvertSToF: + case OpUConvert: + case OpConvertUToF: + { + auto input_type = opcode == OpSConvert || opcode == OpConvertSToF ? int_type : uint_type; + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto &type = get<SPIRType>(result_type); + auto &arg_type = expression_type(ops[2]); + auto func = type_to_glsl_constructor(type); + + if (arg_type.width < type.width || type_is_floating_point(type)) + emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), input_type, type.basetype); + else + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + break; + } + + case OpConvertFToU: + case OpConvertFToS: + { + // Cast to expected arithmetic type, then potentially bitcast away to desired signedness. + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + auto &type = get<SPIRType>(result_type); + auto expected_type = type; + auto &float_type = expression_type(ops[2]); + expected_type.basetype = + opcode == OpConvertFToS ? to_signed_basetype(type.width) : to_unsigned_basetype(type.width); + + auto func = type_to_glsl_constructor(expected_type); + emit_unary_func_op_cast(result_type, id, ops[2], func.c_str(), float_type.basetype, expected_type.basetype); + break; + } + + case OpFConvert: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto func = type_to_glsl_constructor(get<SPIRType>(result_type)); + emit_unary_func_op(result_type, id, ops[2], func.c_str()); + break; + } + + case OpBitcast: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + if (!emit_complex_bitcast(result_type, id, arg)) + { + auto op = bitcast_glsl_op(get<SPIRType>(result_type), expression_type(arg)); + emit_unary_func_op(result_type, id, arg, op.c_str()); + } + break; + } + + case OpQuantizeToF16: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t arg = ops[2]; + + string op; + auto &type = get<SPIRType>(result_type); + + switch (type.vecsize) + { + case 1: + op = join("unpackHalf2x16(packHalf2x16(vec2(", to_expression(arg), "))).x"); + break; + case 2: + op = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), "))"); + break; + case 3: + { + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zz)).x"); + op = join("vec3(", op0, ", ", op1, ")"); + break; + } + case 4: + { + auto op0 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".xy))"); + auto op1 = join("unpackHalf2x16(packHalf2x16(", to_expression(arg), ".zw))"); + op = join("vec4(", op0, ", ", op1, ")"); + break; + } + default: + SPIRV_CROSS_THROW("Illegal argument to OpQuantizeToF16."); + } + + emit_op(result_type, id, op, should_forward(arg)); + inherit_expression_dependencies(id, arg); + break; + } + + // Derivatives + case OpDPdx: + GLSL_UFOP(dFdx); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdy: + GLSL_UFOP(dFdy); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxFine: + GLSL_UFOP(dFdxFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyFine: + GLSL_UFOP(dFdyFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdxCoarse: + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + GLSL_UFOP(dFdxCoarse); + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpDPdyCoarse: + GLSL_UFOP(dFdyCoarse); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidth: + GLSL_UFOP(fwidth); + if (is_legacy_es()) + require_extension_internal("GL_OES_standard_derivatives"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidthCoarse: + GLSL_UFOP(fwidthCoarse); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + case OpFwidthFine: + GLSL_UFOP(fwidthFine); + if (options.es) + { + SPIRV_CROSS_THROW("GL_ARB_derivative_control is unavailable in OpenGL ES."); + } + if (options.version < 450) + require_extension_internal("GL_ARB_derivative_control"); + register_control_dependent_expression(ops[1]); + break; + + // Bitfield + case OpBitFieldInsert: + { + emit_bitfield_insert_op(ops[0], ops[1], ops[2], ops[3], ops[4], ops[5], "bitfieldInsert", SPIRType::Int); + break; + } + + case OpBitFieldSExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", int_type, int_type, + SPIRType::Int, SPIRType::Int); + break; + } + + case OpBitFieldUExtract: + { + emit_trinary_func_op_bitextract(ops[0], ops[1], ops[2], ops[3], ops[4], "bitfieldExtract", uint_type, uint_type, + SPIRType::Int, SPIRType::Int); + break; + } + + case OpBitReverse: + // BitReverse does not have issues with sign since result type must match input type. + GLSL_UFOP(bitfieldReverse); + break; + + case OpBitCount: + { + auto basetype = expression_type(ops[2]).basetype; + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "bitCount", basetype, int_type); + break; + } + + // Atomics + case OpAtomicExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[5]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + + emit_atomic_func_op(result_type, id, ptr, val, op); + break; + } + + case OpAtomicCompareExchange: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t ptr = ops[2]; + uint32_t val = ops[6]; + uint32_t comp = ops[7]; + const char *op = check_atomic_image(ptr) ? "imageAtomicCompSwap" : "atomicCompSwap"; + + emit_atomic_func_op(result_type, id, ptr, comp, val, op); + break; + } + + case OpAtomicLoad: + { + // In plain GLSL, we have no atomic loads, so emulate this by fetch adding by 0 and hope compiler figures it out. + // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. + auto &type = expression_type(ops[2]); + forced_temporaries.insert(ops[1]); + bool atomic_image = check_atomic_image(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt) || + (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt); + const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; + const char *increment = unsigned_type ? "0u" : "0"; + emit_op(ops[0], ops[1], + join(op, "(", + to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicStore: + { + // In plain GLSL, we have no atomic stores, so emulate this with an atomic exchange where we don't consume the result. + // Alternatively, we could rely on KHR_memory_model, but that's not very helpful for GL. + uint32_t ptr = ops[0]; + // Ignore semantics for now, probably only relevant to CL. + uint32_t val = ops[3]; + const char *op = check_atomic_image(ptr) ? "imageAtomicExchange" : "atomicExchange"; + statement(op, "(", to_non_uniform_aware_expression(ptr), ", ", to_expression(val), ");"); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicIIncrement: + case OpAtomicIDecrement: + { + forced_temporaries.insert(ops[1]); + auto &type = expression_type(ops[2]); + if (type.storage == StorageClassAtomicCounter) + { + // Legacy GLSL stuff, not sure if this is relevant to support. + if (opcode == OpAtomicIIncrement) + GLSL_UFOP(atomicCounterIncrement); + else + GLSL_UFOP(atomicCounterDecrement); + } + else + { + bool atomic_image = check_atomic_image(ops[2]); + bool unsigned_type = (type.basetype == SPIRType::UInt) || + (atomic_image && get<SPIRType>(type.image.type).basetype == SPIRType::UInt); + const char *op = atomic_image ? "imageAtomicAdd" : "atomicAdd"; + + const char *increment = nullptr; + if (opcode == OpAtomicIIncrement && unsigned_type) + increment = "1u"; + else if (opcode == OpAtomicIIncrement) + increment = "1"; + else if (unsigned_type) + increment = "uint(-1)"; + else + increment = "-1"; + + emit_op(ops[0], ops[1], + join(op, "(", to_non_uniform_aware_expression(ops[2]), ", ", increment, ")"), false); + } + + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicIAdd: + case OpAtomicFAddEXT: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); + break; + } + + case OpAtomicISub: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAdd" : "atomicAdd"; + forced_temporaries.insert(ops[1]); + auto expr = join(op, "(", to_non_uniform_aware_expression(ops[2]), ", -", to_enclosed_expression(ops[5]), ")"); + emit_op(ops[0], ops[1], expr, should_forward(ops[2]) && should_forward(ops[5])); + flush_all_atomic_capable_variables(); + break; + } + + case OpAtomicSMin: + case OpAtomicUMin: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMin" : "atomicMin"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); + break; + } + + case OpAtomicSMax: + case OpAtomicUMax: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicMax" : "atomicMax"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); + break; + } + + case OpAtomicAnd: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicAnd" : "atomicAnd"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); + break; + } + + case OpAtomicOr: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicOr" : "atomicOr"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); + break; + } + + case OpAtomicXor: + { + const char *op = check_atomic_image(ops[2]) ? "imageAtomicXor" : "atomicXor"; + emit_atomic_func_op(ops[0], ops[1], ops[2], ops[5], op); + break; + } + + // Geometry shaders + case OpEmitVertex: + statement("EmitVertex();"); + break; + + case OpEndPrimitive: + statement("EndPrimitive();"); + break; + + case OpEmitStreamVertex: + { + if (options.es) + SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); + + auto stream_expr = to_expression(ops[0]); + if (expression_type(ops[0]).basetype != SPIRType::Int) + stream_expr = join("int(", stream_expr, ")"); + statement("EmitStreamVertex(", stream_expr, ");"); + break; + } + + case OpEndStreamPrimitive: + { + if (options.es) + SPIRV_CROSS_THROW("Multi-stream geometry shaders not supported in ES."); + else if (!options.es && options.version < 400) + SPIRV_CROSS_THROW("Multi-stream geometry shaders only supported in GLSL 400."); + + auto stream_expr = to_expression(ops[0]); + if (expression_type(ops[0]).basetype != SPIRType::Int) + stream_expr = join("int(", stream_expr, ")"); + statement("EndStreamPrimitive(", stream_expr, ");"); + break; + } + + // Textures + case OpImageSampleExplicitLod: + case OpImageSampleProjExplicitLod: + case OpImageSampleDrefExplicitLod: + case OpImageSampleProjDrefExplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleDrefImplicitLod: + case OpImageSampleProjDrefImplicitLod: + case OpImageFetch: + case OpImageGather: + case OpImageDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(instruction, false); + break; + + case OpImageSparseSampleExplicitLod: + case OpImageSparseSampleProjExplicitLod: + case OpImageSparseSampleDrefExplicitLod: + case OpImageSparseSampleProjDrefExplicitLod: + case OpImageSparseSampleImplicitLod: + case OpImageSparseSampleProjImplicitLod: + case OpImageSparseSampleDrefImplicitLod: + case OpImageSparseSampleProjDrefImplicitLod: + case OpImageSparseFetch: + case OpImageSparseGather: + case OpImageSparseDrefGather: + // Gets a bit hairy, so move this to a separate instruction. + emit_texture_op(instruction, true); + break; + + case OpImageSparseTexelsResident: + if (options.es) + SPIRV_CROSS_THROW("Sparse feedback is not supported in GLSL."); + require_extension_internal("GL_ARB_sparse_texture2"); + emit_unary_func_op_cast(ops[0], ops[1], ops[2], "sparseTexelsResidentARB", int_type, SPIRType::Boolean); + break; + + case OpImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + // Suppress usage tracking. + auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); + + // When using the image, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : ID(0); + break; + } + + case OpImageQueryLod: + { + const char *op = nullptr; + if (!options.es && options.version < 400) + { + require_extension_internal("GL_ARB_texture_query_lod"); + // For some reason, the ARB spec is all-caps. + op = "textureQueryLOD"; + } + else if (options.es) + { + if (options.version < 300) + SPIRV_CROSS_THROW("textureQueryLod not supported in legacy ES"); + require_extension_internal("GL_EXT_texture_query_lod"); + op = "textureQueryLOD"; + } + else + op = "textureQueryLod"; + + auto sampler_expr = to_expression(ops[2]); + if (has_decoration(ops[2], DecorationNonUniform)) + { + if (maybe_get_backing_variable(ops[2])) + convert_non_uniform_expression(sampler_expr, ops[2]); + else if (*backend.nonuniform_qualifier != '\0') + sampler_expr = join(backend.nonuniform_qualifier, "(", sampler_expr, ")"); + } + + bool forward = should_forward(ops[3]); + emit_op(ops[0], ops[1], + join(op, "(", sampler_expr, ", ", to_unpacked_expression(ops[3]), ")"), + forward); + inherit_expression_dependencies(ops[1], ops[2]); + inherit_expression_dependencies(ops[1], ops[3]); + register_control_dependent_expression(ops[1]); + break; + } + + case OpImageQueryLevels: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_texture_query_levels"); + if (options.es) + SPIRV_CROSS_THROW("textureQueryLevels not supported in ES profile."); + + auto expr = join("textureQueryLevels(", convert_separate_image_to_expression(ops[2]), ")"); + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpImageQuerySamples: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (options.es) + SPIRV_CROSS_THROW("textureSamples and imageSamples not supported in ES profile."); + else if (options.version < 450) + require_extension_internal("GL_ARB_texture_query_samples"); + + string expr; + if (type.image.sampled == 2) + expr = join("imageSamples(", to_non_uniform_aware_expression(ops[2]), ")"); + else + expr = join("textureSamples(", convert_separate_image_to_expression(ops[2]), ")"); + + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + case OpSampledImage: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_sampled_image_op(result_type, id, ops[2], ops[3]); + inherit_expression_dependencies(id, ops[2]); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpImageQuerySizeLod: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + uint32_t img = ops[2]; + auto &type = expression_type(img); + auto &imgtype = get<SPIRType>(type.self); + + std::string fname = "textureSize"; + if (is_legacy_desktop()) + { + fname = legacy_tex_op(fname, imgtype, img); + } + else if (is_legacy_es()) + SPIRV_CROSS_THROW("textureSize is not supported in ESSL 100."); + + auto expr = join(fname, "(", convert_separate_image_to_expression(img), ", ", + bitcast_expression(SPIRType::Int, ops[3]), ")"); + + // ES needs to emulate 1D images as 2D. + if (type.image.dim == Dim1D && options.es) + expr = join(expr, ".x"); + + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + break; + } + + // Image load/store + case OpImageRead: + case OpImageSparseRead: + { + // We added Nonreadable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to read the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[2]); + if (var) + { + auto &flags = get_decoration_bitset(var->self); + if (flags.get(DecorationNonReadable)) + { + unset_decoration(var->self, DecorationNonReadable); + force_recompile(); + } + } + + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + bool pure; + string imgexpr; + auto &type = expression_type(ops[2]); + + if (var && var->remapped_variable) // Remapped input, just read as-is without any op-code + { + if (type.image.ms) + SPIRV_CROSS_THROW("Trying to remap multisampled image to variable, this is not possible."); + + auto itr = + find_if(begin(pls_inputs), end(pls_inputs), [var](const PlsRemap &pls) { return pls.id == var->self; }); + + if (itr == end(pls_inputs)) + { + // For non-PLS inputs, we rely on subpass type remapping information to get it right + // since ImageRead always returns 4-component vectors and the backing type is opaque. + if (!var->remapped_components) + SPIRV_CROSS_THROW("subpassInput was remapped, but remap_components is not set correctly."); + imgexpr = remap_swizzle(get<SPIRType>(result_type), var->remapped_components, to_expression(ops[2])); + } + else + { + // PLS input could have different number of components than what the SPIR expects, swizzle to + // the appropriate vector size. + uint32_t components = pls_format_to_components(itr->format); + imgexpr = remap_swizzle(get<SPIRType>(result_type), components, to_expression(ops[2])); + } + pure = true; + } + else if (type.image.dim == DimSubpassData) + { + if (var && subpass_input_is_framebuffer_fetch(var->self)) + { + imgexpr = to_expression(var->self); + } + else if (options.vulkan_semantics) + { + // With Vulkan semantics, use the proper Vulkan GLSL construct. + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ", ", to_expression(samples), ")"); + } + else + imgexpr = join("subpassLoad(", to_non_uniform_aware_expression(ops[2]), ")"); + } + else + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), ", + to_expression(samples), ")"); + } + else + { + // Implement subpass loads via texture barrier style sampling. + imgexpr = join("texelFetch(", to_non_uniform_aware_expression(ops[2]), ", ivec2(gl_FragCoord.xy), 0)"); + } + } + imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr); + pure = true; + } + else + { + bool sparse = opcode == OpImageSparseRead; + uint32_t sparse_code_id = 0; + uint32_t sparse_texel_id = 0; + if (sparse) + emit_sparse_feedback_temporaries(ops[0], ops[1], sparse_code_id, sparse_texel_id); + + // imageLoad only accepts int coords, not uint. + auto coord_expr = to_expression(ops[3]); + auto target_coord_type = expression_type(ops[3]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + + // ES needs to emulate 1D images as 2D. + if (type.image.dim == Dim1D && options.es) + coord_expr = join("ivec2(", coord_expr, ", 0)"); + + // Plain image load/store. + if (sparse) + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", + coord_expr, ", ", to_expression(samples), ", ", to_expression(sparse_texel_id), ");"); + } + else + { + statement(to_expression(sparse_code_id), " = sparseImageLoadARB(", to_non_uniform_aware_expression(ops[2]), ", ", + coord_expr, ", ", to_expression(sparse_texel_id), ");"); + } + imgexpr = join(type_to_glsl(get<SPIRType>(result_type)), "(", to_expression(sparse_code_id), ", ", + to_expression(sparse_texel_id), ")"); + } + else + { + if (type.image.ms) + { + uint32_t operands = ops[4]; + if (operands != ImageOperandsSampleMask || length != 6) + SPIRV_CROSS_THROW("Multisampled image used in OpImageRead, but unexpected " + "operand mask was used."); + + uint32_t samples = ops[5]; + imgexpr = + join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ", ", to_expression(samples), ")"); + } + else + imgexpr = join("imageLoad(", to_non_uniform_aware_expression(ops[2]), ", ", coord_expr, ")"); + } + + if (!sparse) + imgexpr = remap_swizzle(get<SPIRType>(result_type), 4, imgexpr); + pure = false; + } + + if (var) + { + bool forward = forced_temporaries.find(id) == end(forced_temporaries); + auto &e = emit_op(result_type, id, imgexpr, forward); + + // We only need to track dependencies if we're reading from image load/store. + if (!pure) + { + e.loaded_from = var->self; + if (forward) + var->dependees.push_back(id); + } + } + else + emit_op(result_type, id, imgexpr, false); + + inherit_expression_dependencies(id, ops[2]); + if (type.image.ms) + inherit_expression_dependencies(id, ops[5]); + break; + } + + case OpImageTexelPointer: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + auto coord_expr = to_expression(ops[3]); + auto target_coord_type = expression_type(ops[3]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[3]).basetype, coord_expr); + + auto expr = join(to_expression(ops[2]), ", ", coord_expr); + auto &e = set<SPIRExpression>(id, expr, result_type, true); + + // When using the pointer, we need to know which variable it is actually loaded from. + auto *var = maybe_get_backing_variable(ops[2]); + e.loaded_from = var ? var->self : ID(0); + inherit_expression_dependencies(id, ops[3]); + break; + } + + case OpImageWrite: + { + // We added Nonwritable speculatively to the OpImage variable due to glslangValidator + // not adding the proper qualifiers. + // If it turns out we need to write to the image after all, remove the qualifier and recompile. + auto *var = maybe_get_backing_variable(ops[0]); + if (var) + { + if (has_decoration(var->self, DecorationNonWritable)) + { + unset_decoration(var->self, DecorationNonWritable); + force_recompile(); + } + } + + auto &type = expression_type(ops[0]); + auto &value_type = expression_type(ops[2]); + auto store_type = value_type; + store_type.vecsize = 4; + + // imageStore only accepts int coords, not uint. + auto coord_expr = to_expression(ops[1]); + auto target_coord_type = expression_type(ops[1]); + target_coord_type.basetype = SPIRType::Int; + coord_expr = bitcast_expression(target_coord_type, expression_type(ops[1]).basetype, coord_expr); + + // ES needs to emulate 1D images as 2D. + if (type.image.dim == Dim1D && options.es) + coord_expr = join("ivec2(", coord_expr, ", 0)"); + + if (type.image.ms) + { + uint32_t operands = ops[3]; + if (operands != ImageOperandsSampleMask || length != 5) + SPIRV_CROSS_THROW("Multisampled image used in OpImageWrite, but unexpected operand mask was used."); + uint32_t samples = ops[4]; + statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", to_expression(samples), ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + } + else + statement("imageStore(", to_non_uniform_aware_expression(ops[0]), ", ", coord_expr, ", ", + remap_swizzle(store_type, value_type.vecsize, to_expression(ops[2])), ");"); + + if (var && variable_storage_is_aliased(*var)) + flush_all_aliased_variables(); + break; + } + + case OpImageQuerySize: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.basetype == SPIRType::Image) + { + string expr; + if (type.image.sampled == 2) + { + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_shader_image_size"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for imageSize."); + + // The size of an image is always constant. + expr = join("imageSize(", to_non_uniform_aware_expression(ops[2]), ")"); + } + else + { + // This path is hit for samplerBuffers and multisampled images which do not have LOD. + std::string fname = "textureSize"; + if (is_legacy()) + { + auto &imgtype = get<SPIRType>(type.self); + fname = legacy_tex_op(fname, imgtype, ops[2]); + } + expr = join(fname, "(", convert_separate_image_to_expression(ops[2]), ")"); + } + + auto &restype = get<SPIRType>(ops[0]); + expr = bitcast_expression(restype, SPIRType::Int, expr); + emit_op(result_type, id, expr, true); + } + else + SPIRV_CROSS_THROW("Invalid type for OpImageQuerySize."); + break; + } + + case OpImageSampleWeightedQCOM: + case OpImageBoxFilterQCOM: + case OpImageBlockMatchSSDQCOM: + case OpImageBlockMatchSADQCOM: + { + require_extension_internal("GL_QCOM_image_processing"); + uint32_t result_type_id = ops[0]; + uint32_t id = ops[1]; + string expr; + switch (opcode) + { + case OpImageSampleWeightedQCOM: + expr = "textureWeightedQCOM"; + break; + case OpImageBoxFilterQCOM: + expr = "textureBoxFilterQCOM"; + break; + case OpImageBlockMatchSSDQCOM: + expr = "textureBlockMatchSSDQCOM"; + break; + case OpImageBlockMatchSADQCOM: + expr = "textureBlockMatchSADQCOM"; + break; + default: + SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing."); + } + expr += "("; + + bool forward = false; + expr += to_expression(ops[2]); + expr += ", " + to_expression(ops[3]); + + switch (opcode) + { + case OpImageSampleWeightedQCOM: + expr += ", " + to_non_uniform_aware_expression(ops[4]); + break; + case OpImageBoxFilterQCOM: + expr += ", " + to_expression(ops[4]); + break; + case OpImageBlockMatchSSDQCOM: + case OpImageBlockMatchSADQCOM: + expr += ", " + to_non_uniform_aware_expression(ops[4]); + expr += ", " + to_expression(ops[5]); + expr += ", " + to_expression(ops[6]); + break; + default: + SPIRV_CROSS_THROW("Invalid opcode for QCOM_image_processing."); + } + + expr += ")"; + emit_op(result_type_id, id, expr, forward); + + inherit_expression_dependencies(id, ops[3]); + if (opcode == OpImageBlockMatchSSDQCOM || opcode == OpImageBlockMatchSADQCOM) + inherit_expression_dependencies(id, ops[5]); + + break; + } + + // Compute + case OpControlBarrier: + case OpMemoryBarrier: + { + uint32_t execution_scope = 0; + uint32_t memory; + uint32_t semantics; + + if (opcode == OpMemoryBarrier) + { + memory = evaluate_constant_u32(ops[0]); + semantics = evaluate_constant_u32(ops[1]); + } + else + { + execution_scope = evaluate_constant_u32(ops[0]); + memory = evaluate_constant_u32(ops[1]); + semantics = evaluate_constant_u32(ops[2]); + } + + if (execution_scope == ScopeSubgroup || memory == ScopeSubgroup) + { + // OpControlBarrier with ScopeSubgroup is subgroupBarrier() + if (opcode != OpControlBarrier) + { + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupMemBarrier); + } + else + { + request_subgroup_feature(ShaderSubgroupSupportHelper::SubgroupBarrier); + } + } + + if (execution_scope != ScopeSubgroup && get_entry_point().model == ExecutionModelTessellationControl) + { + // Control shaders only have barriers, and it implies memory barriers. + if (opcode == OpControlBarrier) + statement("barrier();"); + break; + } + + // We only care about these flags, acquire/release and friends are not relevant to GLSL. + semantics = mask_relevant_memory_semantics(semantics); + + if (opcode == OpMemoryBarrier) + { + // If we are a memory barrier, and the next instruction is a control barrier, check if that memory barrier + // does what we need, so we avoid redundant barriers. + const Instruction *next = get_next_instruction_in_block(instruction); + if (next && next->op == OpControlBarrier) + { + auto *next_ops = stream(*next); + uint32_t next_memory = evaluate_constant_u32(next_ops[1]); + uint32_t next_semantics = evaluate_constant_u32(next_ops[2]); + next_semantics = mask_relevant_memory_semantics(next_semantics); + + bool memory_scope_covered = false; + if (next_memory == memory) + memory_scope_covered = true; + else if (next_semantics == MemorySemanticsWorkgroupMemoryMask) + { + // If we only care about workgroup memory, either Device or Workgroup scope is fine, + // scope does not have to match. + if ((next_memory == ScopeDevice || next_memory == ScopeWorkgroup) && + (memory == ScopeDevice || memory == ScopeWorkgroup)) + { + memory_scope_covered = true; + } + } + else if (memory == ScopeWorkgroup && next_memory == ScopeDevice) + { + // The control barrier has device scope, but the memory barrier just has workgroup scope. + memory_scope_covered = true; + } + + // If we have the same memory scope, and all memory types are covered, we're good. + if (memory_scope_covered && (semantics & next_semantics) == semantics) + break; + } + } + + // We are synchronizing some memory or syncing execution, + // so we cannot forward any loads beyond the memory barrier. + if (semantics || opcode == OpControlBarrier) + { + assert(current_emitting_block); + flush_control_dependent_expressions(current_emitting_block->self); + flush_all_active_variables(); + } + + if (memory == ScopeWorkgroup) // Only need to consider memory within a group + { + if (semantics == MemorySemanticsWorkgroupMemoryMask) + { + // OpControlBarrier implies a memory barrier for shared memory as well. + bool implies_shared_barrier = opcode == OpControlBarrier && execution_scope == ScopeWorkgroup; + if (!implies_shared_barrier) + statement("memoryBarrierShared();"); + } + else if (semantics != 0) + statement("groupMemoryBarrier();"); + } + else if (memory == ScopeSubgroup) + { + const uint32_t all_barriers = + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("subgroupMemoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 3 barriers. + statement("subgroupMemoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("subgroupMemoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("subgroupMemoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("subgroupMemoryBarrierImage();"); + } + } + else + { + const uint32_t all_barriers = + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | MemorySemanticsImageMemoryMask; + + if (semantics & (MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask)) + { + // These are not relevant for GLSL, but assume it means memoryBarrier(). + // memoryBarrier() does everything, so no need to test anything else. + statement("memoryBarrier();"); + } + else if ((semantics & all_barriers) == all_barriers) + { + // Short-hand instead of emitting 4 barriers. + statement("memoryBarrier();"); + } + else + { + // Pick out individual barriers. + if (semantics & MemorySemanticsWorkgroupMemoryMask) + statement("memoryBarrierShared();"); + if (semantics & MemorySemanticsUniformMemoryMask) + statement("memoryBarrierBuffer();"); + if (semantics & MemorySemanticsImageMemoryMask) + statement("memoryBarrierImage();"); + } + } + + if (opcode == OpControlBarrier) + { + if (execution_scope == ScopeSubgroup) + statement("subgroupBarrier();"); + else + statement("barrier();"); + } + break; + } + + case OpExtInst: + { + uint32_t extension_set = ops[2]; + auto ext = get<SPIRExtension>(extension_set).ext; + + if (ext == SPIRExtension::GLSL) + { + emit_glsl_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (ext == SPIRExtension::SPV_AMD_shader_ballot) + { + emit_spv_amd_shader_ballot_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (ext == SPIRExtension::SPV_AMD_shader_explicit_vertex_parameter) + { + emit_spv_amd_shader_explicit_vertex_parameter_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (ext == SPIRExtension::SPV_AMD_shader_trinary_minmax) + { + emit_spv_amd_shader_trinary_minmax_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (ext == SPIRExtension::SPV_AMD_gcn_shader) + { + emit_spv_amd_gcn_shader_op(ops[0], ops[1], ops[3], &ops[4], length - 4); + } + else if (ext == SPIRExtension::SPV_debug_info || + ext == SPIRExtension::NonSemanticShaderDebugInfo || + ext == SPIRExtension::NonSemanticGeneric) + { + break; // Ignore SPIR-V debug information extended instructions. + } + else if (ext == SPIRExtension::NonSemanticDebugPrintf) + { + // Operation 1 is printf. + if (ops[3] == 1) + { + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("Debug printf is only supported in Vulkan GLSL.\n"); + require_extension_internal("GL_EXT_debug_printf"); + auto &format_string = get<SPIRString>(ops[4]).str; + string expr = join("debugPrintfEXT(\"", format_string, "\""); + for (uint32_t i = 5; i < length; i++) + { + expr += ", "; + expr += to_expression(ops[i]); + } + statement(expr, ");"); + } + } + else + { + statement("// unimplemented ext op ", instruction.op); + break; + } + + break; + } + + // Legacy sub-group stuff ... + case OpSubgroupBallotKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + string expr; + expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)"); + emit_op(result_type, id, expr, should_forward(ops[2])); + + require_extension_internal("GL_ARB_shader_ballot"); + inherit_expression_dependencies(id, ops[2]); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupFirstInvocationKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB"); + + require_extension_internal("GL_ARB_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupReadInvocationKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB"); + + require_extension_internal("GL_ARB_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAllKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAnyKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpSubgroupAllEqualKHR: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB"); + + require_extension_internal("GL_ARB_shader_group_vote"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupIAddNonUniformAMD: + case OpGroupFAddNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupFMinNonUniformAMD: + case OpGroupUMinNonUniformAMD: + case OpGroupSMinNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpGroupFMaxNonUniformAMD: + case OpGroupUMaxNonUniformAMD: + case OpGroupSMaxNonUniformAMD: + { + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD"); + + require_extension_internal("GL_AMD_shader_ballot"); + register_control_dependent_expression(ops[1]); + break; + } + + case OpFragmentMaskFetchAMD: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.image.dim == spv::DimSubpassData) + { + emit_unary_func_op(result_type, id, ops[2], "fragmentMaskFetchAMD"); + } + else + { + emit_binary_func_op(result_type, id, ops[2], ops[3], "fragmentMaskFetchAMD"); + } + + require_extension_internal("GL_AMD_shader_fragment_mask"); + break; + } + + case OpFragmentFetchAMD: + { + auto &type = expression_type(ops[2]); + uint32_t result_type = ops[0]; + uint32_t id = ops[1]; + + if (type.image.dim == spv::DimSubpassData) + { + emit_binary_func_op(result_type, id, ops[2], ops[4], "fragmentFetchAMD"); + } + else + { + emit_trinary_func_op(result_type, id, ops[2], ops[3], ops[4], "fragmentFetchAMD"); + } + + require_extension_internal("GL_AMD_shader_fragment_mask"); + break; + } + + // Vulkan 1.1 sub-group stuff ... + case OpGroupNonUniformElect: + case OpGroupNonUniformBroadcast: + case OpGroupNonUniformBroadcastFirst: + case OpGroupNonUniformBallot: + case OpGroupNonUniformInverseBallot: + case OpGroupNonUniformBallotBitExtract: + case OpGroupNonUniformBallotBitCount: + case OpGroupNonUniformBallotFindLSB: + case OpGroupNonUniformBallotFindMSB: + case OpGroupNonUniformShuffle: + case OpGroupNonUniformShuffleXor: + case OpGroupNonUniformShuffleUp: + case OpGroupNonUniformShuffleDown: + case OpGroupNonUniformAll: + case OpGroupNonUniformAny: + case OpGroupNonUniformAllEqual: + case OpGroupNonUniformFAdd: + case OpGroupNonUniformIAdd: + case OpGroupNonUniformFMul: + case OpGroupNonUniformIMul: + case OpGroupNonUniformFMin: + case OpGroupNonUniformFMax: + case OpGroupNonUniformSMin: + case OpGroupNonUniformSMax: + case OpGroupNonUniformUMin: + case OpGroupNonUniformUMax: + case OpGroupNonUniformBitwiseAnd: + case OpGroupNonUniformBitwiseOr: + case OpGroupNonUniformBitwiseXor: + case OpGroupNonUniformLogicalAnd: + case OpGroupNonUniformLogicalOr: + case OpGroupNonUniformLogicalXor: + case OpGroupNonUniformQuadSwap: + case OpGroupNonUniformQuadBroadcast: + emit_subgroup_op(instruction); + break; + + case OpFUnordEqual: + case OpFUnordLessThan: + case OpFUnordGreaterThan: + case OpFUnordLessThanEqual: + case OpFUnordGreaterThanEqual: + { + // GLSL doesn't specify if floating point comparisons are ordered or unordered, + // but glslang always emits ordered floating point compares for GLSL. + // To get unordered compares, we can test the opposite thing and invert the result. + // This way, we force true when there is any NaN present. + uint32_t op0 = ops[2]; + uint32_t op1 = ops[3]; + + string expr; + if (expression_type(op0).vecsize > 1) + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = "notEqual"; + break; + + case OpFUnordLessThan: + comp_op = "greaterThanEqual"; + break; + + case OpFUnordLessThanEqual: + comp_op = "greaterThan"; + break; + + case OpFUnordGreaterThan: + comp_op = "lessThanEqual"; + break; + + case OpFUnordGreaterThanEqual: + comp_op = "lessThan"; + break; + + default: + assert(0); + break; + } + + expr = join("not(", comp_op, "(", to_unpacked_expression(op0), ", ", to_unpacked_expression(op1), "))"); + } + else + { + const char *comp_op = nullptr; + switch (opcode) + { + case OpFUnordEqual: + comp_op = " != "; + break; + + case OpFUnordLessThan: + comp_op = " >= "; + break; + + case OpFUnordLessThanEqual: + comp_op = " > "; + break; + + case OpFUnordGreaterThan: + comp_op = " <= "; + break; + + case OpFUnordGreaterThanEqual: + comp_op = " < "; + break; + + default: + assert(0); + break; + } + + expr = join("!(", to_enclosed_unpacked_expression(op0), comp_op, to_enclosed_unpacked_expression(op1), ")"); + } + + emit_op(ops[0], ops[1], expr, should_forward(op0) && should_forward(op1)); + inherit_expression_dependencies(ops[1], op0); + inherit_expression_dependencies(ops[1], op1); + break; + } + + case OpReportIntersectionKHR: + // NV is same opcode. + forced_temporaries.insert(ops[1]); + if (ray_tracing_is_khr) + GLSL_BFOP(reportIntersectionEXT); + else + GLSL_BFOP(reportIntersectionNV); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpIgnoreIntersectionNV: + // KHR variant is a terminator. + statement("ignoreIntersectionNV();"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpTerminateRayNV: + // KHR variant is a terminator. + statement("terminateRayNV();"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpTraceNV: + statement("traceNV(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", + to_expression(ops[9]), ", ", to_expression(ops[10]), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpTraceRayKHR: + if (!has_decoration(ops[10], DecorationLocation)) + SPIRV_CROSS_THROW("A memory declaration object must be used in TraceRayKHR."); + statement("traceRayEXT(", to_non_uniform_aware_expression(ops[0]), ", ", to_expression(ops[1]), ", ", to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), ", ", to_expression(ops[8]), ", ", + to_expression(ops[9]), ", ", get_decoration(ops[10], DecorationLocation), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpExecuteCallableNV: + statement("executeCallableNV(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + case OpExecuteCallableKHR: + if (!has_decoration(ops[1], DecorationLocation)) + SPIRV_CROSS_THROW("A memory declaration object must be used in ExecuteCallableKHR."); + statement("executeCallableEXT(", to_expression(ops[0]), ", ", get_decoration(ops[1], DecorationLocation), ");"); + flush_control_dependent_expressions(current_emitting_block->self); + break; + + // Don't bother forwarding temporaries. Avoids having to test expression invalidation with ray query objects. + case OpRayQueryInitializeKHR: + flush_variable_declaration(ops[0]); + statement("rayQueryInitializeEXT(", + to_expression(ops[0]), ", ", to_expression(ops[1]), ", ", + to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + to_expression(ops[4]), ", ", to_expression(ops[5]), ", ", + to_expression(ops[6]), ", ", to_expression(ops[7]), ");"); + break; + case OpRayQueryProceedKHR: + flush_variable_declaration(ops[0]); + emit_op(ops[0], ops[1], join("rayQueryProceedEXT(", to_expression(ops[2]), ")"), false); + break; + case OpRayQueryTerminateKHR: + flush_variable_declaration(ops[0]); + statement("rayQueryTerminateEXT(", to_expression(ops[0]), ");"); + break; + case OpRayQueryGenerateIntersectionKHR: + flush_variable_declaration(ops[0]); + statement("rayQueryGenerateIntersectionEXT(", to_expression(ops[0]), ", ", to_expression(ops[1]), ");"); + break; + case OpRayQueryConfirmIntersectionKHR: + flush_variable_declaration(ops[0]); + statement("rayQueryConfirmIntersectionEXT(", to_expression(ops[0]), ");"); + break; +#define GLSL_RAY_QUERY_GET_OP(op) \ + case OpRayQueryGet##op##KHR: \ + flush_variable_declaration(ops[2]); \ + emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ")"), false); \ + break +#define GLSL_RAY_QUERY_GET_OP2(op) \ + case OpRayQueryGet##op##KHR: \ + flush_variable_declaration(ops[2]); \ + emit_op(ops[0], ops[1], join("rayQueryGet" #op "EXT(", to_expression(ops[2]), ", ", "bool(", to_expression(ops[3]), "))"), false); \ + break + GLSL_RAY_QUERY_GET_OP(RayTMin); + GLSL_RAY_QUERY_GET_OP(RayFlags); + GLSL_RAY_QUERY_GET_OP(WorldRayOrigin); + GLSL_RAY_QUERY_GET_OP(WorldRayDirection); + GLSL_RAY_QUERY_GET_OP(IntersectionCandidateAABBOpaque); + GLSL_RAY_QUERY_GET_OP2(IntersectionType); + GLSL_RAY_QUERY_GET_OP2(IntersectionT); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceCustomIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceId); + GLSL_RAY_QUERY_GET_OP2(IntersectionInstanceShaderBindingTableRecordOffset); + GLSL_RAY_QUERY_GET_OP2(IntersectionGeometryIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionPrimitiveIndex); + GLSL_RAY_QUERY_GET_OP2(IntersectionBarycentrics); + GLSL_RAY_QUERY_GET_OP2(IntersectionFrontFace); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayDirection); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectRayOrigin); + GLSL_RAY_QUERY_GET_OP2(IntersectionObjectToWorld); + GLSL_RAY_QUERY_GET_OP2(IntersectionWorldToObject); +#undef GLSL_RAY_QUERY_GET_OP +#undef GLSL_RAY_QUERY_GET_OP2 + + case OpConvertUToAccelerationStructureKHR: + { + require_extension_internal("GL_EXT_ray_tracing"); + + bool elide_temporary = should_forward(ops[2]) && forced_temporaries.count(ops[1]) == 0 && + !hoisted_temporaries.count(ops[1]); + + if (elide_temporary) + { + GLSL_UFOP(accelerationStructureEXT); + } + else + { + // Force this path in subsequent iterations. + forced_temporaries.insert(ops[1]); + + // We cannot declare a temporary acceleration structure in GLSL. + // If we get to this point, we'll have to emit a temporary uvec2, + // and cast to RTAS on demand. + statement(declare_temporary(expression_type_id(ops[2]), ops[1]), to_unpacked_expression(ops[2]), ";"); + // Use raw SPIRExpression interface to block all usage tracking. + set<SPIRExpression>(ops[1], join("accelerationStructureEXT(", to_name(ops[1]), ")"), ops[0], true); + } + break; + } + + case OpConvertUToPtr: + { + auto &type = get<SPIRType>(ops[0]); + if (type.storage != StorageClassPhysicalStorageBufferEXT) + SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertUToPtr."); + + auto &in_type = expression_type(ops[2]); + if (in_type.vecsize == 2) + require_extension_internal("GL_EXT_buffer_reference_uvec2"); + + auto op = type_to_glsl(type); + emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); + break; + } + + case OpConvertPtrToU: + { + auto &type = get<SPIRType>(ops[0]); + auto &ptr_type = expression_type(ops[2]); + if (ptr_type.storage != StorageClassPhysicalStorageBufferEXT) + SPIRV_CROSS_THROW("Only StorageClassPhysicalStorageBufferEXT is supported by OpConvertPtrToU."); + + if (type.vecsize == 2) + require_extension_internal("GL_EXT_buffer_reference_uvec2"); + + auto op = type_to_glsl(type); + emit_unary_func_op(ops[0], ops[1], ops[2], op.c_str()); + break; + } + + case OpUndef: + // Undefined value has been declared. + break; + + case OpLine: + { + emit_line_directive(ops[0], ops[1]); + break; + } + + case OpNoLine: + break; + + case OpDemoteToHelperInvocationEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_demote_to_helper_invocation"); + statement(backend.demote_literal, ";"); + break; + + case OpIsHelperInvocationEXT: + if (!options.vulkan_semantics) + SPIRV_CROSS_THROW("GL_EXT_demote_to_helper_invocation is only supported in Vulkan GLSL."); + require_extension_internal("GL_EXT_demote_to_helper_invocation"); + // Helper lane state with demote is volatile by nature. + // Do not forward this. + emit_op(ops[0], ops[1], "helperInvocationEXT()", false); + break; + + case OpBeginInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + statement("SPIRV_Cross_beginInvocationInterlock();"); + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + + case OpEndInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + statement("SPIRV_Cross_endInvocationInterlock();"); + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + + case OpSetMeshOutputsEXT: + statement("SetMeshOutputsEXT(", to_unpacked_expression(ops[0]), ", ", to_unpacked_expression(ops[1]), ");"); + break; + + case OpReadClockKHR: + { + auto &type = get<SPIRType>(ops[0]); + auto scope = static_cast<Scope>(evaluate_constant_u32(ops[2])); + const char *op = nullptr; + // Forwarding clock statements leads to a scenario where an SSA value can take on different + // values every time it's evaluated. Block any forwarding attempt. + // We also might want to invalidate all expressions to function as a sort of optimization + // barrier, but might be overkill for now. + if (scope == ScopeDevice) + { + require_extension_internal("GL_EXT_shader_realtime_clock"); + if (type.basetype == SPIRType::BaseType::UInt64) + op = "clockRealtimeEXT()"; + else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2) + op = "clockRealtime2x32EXT()"; + else + SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode."); + } + else if (scope == ScopeSubgroup) + { + require_extension_internal("GL_ARB_shader_clock"); + if (type.basetype == SPIRType::BaseType::UInt64) + op = "clockARB()"; + else if (type.basetype == SPIRType::BaseType::UInt && type.vecsize == 2) + op = "clock2x32ARB()"; + else + SPIRV_CROSS_THROW("Unsupported result type for OpReadClockKHR opcode."); + } + else + SPIRV_CROSS_THROW("Unsupported scope for OpReadClockKHR opcode."); + + emit_op(ops[0], ops[1], op, false); + break; + } + + default: + statement("// unimplemented op ", instruction.op); + break; + } +} + +// Appends function arguments, mapped from global variables, beyond the specified arg index. +// This is used when a function call uses fewer arguments than the function defines. +// This situation may occur if the function signature has been dynamically modified to +// extract global variables referenced from within the function, and convert them to +// function arguments. This is necessary for shader languages that do not support global +// access to shader input content from within a function (eg. Metal). Each additional +// function args uses the name of the global variable. Function nesting will modify the +// functions and function calls all the way up the nesting chain. +void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<string> &arglist) +{ + auto &args = func.arguments; + uint32_t arg_cnt = uint32_t(args.size()); + for (uint32_t arg_idx = index; arg_idx < arg_cnt; arg_idx++) + { + auto &arg = args[arg_idx]; + assert(arg.alias_global_variable); + + // If the underlying variable needs to be declared + // (ie. a local variable with deferred declaration), do so now. + uint32_t var_id = get<SPIRVariable>(arg.id).basevariable; + if (var_id) + flush_variable_declaration(var_id); + + arglist.push_back(to_func_call_arg(arg, arg.id)); + } +} + +string CompilerGLSL::to_member_name(const SPIRType &type, uint32_t index) +{ + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + { + return to_member_name(get<SPIRType>(type.type_alias), index); + } + + auto &memb = ir.meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + return memb[index].alias; + else + return join("_m", index); +} + +string CompilerGLSL::to_member_reference(uint32_t, const SPIRType &type, uint32_t index, bool) +{ + return join(".", to_member_name(type, index)); +} + +string CompilerGLSL::to_multi_member_reference(const SPIRType &type, const SmallVector<uint32_t> &indices) +{ + string ret; + auto *member_type = &type; + for (auto &index : indices) + { + ret += join(".", to_member_name(*member_type, index)); + member_type = &get<SPIRType>(member_type->member_types[index]); + } + return ret; +} + +void CompilerGLSL::add_member_name(SPIRType &type, uint32_t index) +{ + auto &memb = ir.meta[type.self].members; + if (index < memb.size() && !memb[index].alias.empty()) + { + auto &name = memb[index].alias; + if (name.empty()) + return; + + ParsedIR::sanitize_identifier(name, true, true); + update_name_cache(type.member_name_cache, name); + } +} + +// Checks whether the ID is a row_major matrix that requires conversion before use +bool CompilerGLSL::is_non_native_row_major_matrix(uint32_t id) +{ + // Natively supported row-major matrices do not need to be converted. + // Legacy targets do not support row major. + if (backend.native_row_major_matrix && !is_legacy()) + return false; + + auto *e = maybe_get<SPIRExpression>(id); + if (e) + return e->need_transpose; + else + return has_decoration(id, DecorationRowMajor); +} + +// Checks whether the member is a row_major matrix that requires conversion before use +bool CompilerGLSL::member_is_non_native_row_major_matrix(const SPIRType &type, uint32_t index) +{ + // Natively supported row-major matrices do not need to be converted. + if (backend.native_row_major_matrix && !is_legacy()) + return false; + + // Non-matrix or column-major matrix types do not need to be converted. + if (!has_member_decoration(type.self, index, DecorationRowMajor)) + return false; + + // Only square row-major matrices can be converted at this time. + // Converting non-square matrices will require defining custom GLSL function that + // swaps matrix elements while retaining the original dimensional form of the matrix. + const auto mbr_type = get<SPIRType>(type.member_types[index]); + if (mbr_type.columns != mbr_type.vecsize) + SPIRV_CROSS_THROW("Row-major matrices must be square on this platform."); + + return true; +} + +// Checks if we need to remap physical type IDs when declaring the type in a buffer. +bool CompilerGLSL::member_is_remapped_physical_type(const SPIRType &type, uint32_t index) const +{ + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypeID); +} + +// Checks whether the member is in packed data type, that might need to be unpacked. +bool CompilerGLSL::member_is_packed_physical_type(const SPIRType &type, uint32_t index) const +{ + return has_extended_member_decoration(type.self, index, SPIRVCrossDecorationPhysicalTypePacked); +} + +// Wraps the expression string in a function call that converts the +// row_major matrix result of the expression to a column_major matrix. +// Base implementation uses the standard library transpose() function. +// Subclasses may override to use a different function. +string CompilerGLSL::convert_row_major_matrix(string exp_str, const SPIRType &exp_type, uint32_t /* physical_type_id */, + bool /*is_packed*/, bool relaxed) +{ + strip_enclosed_expression(exp_str); + if (!is_matrix(exp_type)) + { + auto column_index = exp_str.find_last_of('['); + if (column_index == string::npos) + return exp_str; + + auto column_expr = exp_str.substr(column_index); + exp_str.resize(column_index); + + auto end_deferred_index = column_expr.find_last_of(']'); + if (end_deferred_index != string::npos && end_deferred_index + 1 != column_expr.size()) + { + // If we have any data member fixups, it must be transposed so that it refers to this index. + // E.g. [0].data followed by [1] would be shuffled to [1][0].data which is wrong, + // and needs to be [1].data[0] instead. + end_deferred_index++; + column_expr = column_expr.substr(end_deferred_index) + + column_expr.substr(0, end_deferred_index); + } + + auto transposed_expr = type_to_glsl_constructor(exp_type) + "("; + + // Loading a column from a row-major matrix. Unroll the load. + for (uint32_t c = 0; c < exp_type.vecsize; c++) + { + transposed_expr += join(exp_str, '[', c, ']', column_expr); + if (c + 1 < exp_type.vecsize) + transposed_expr += ", "; + } + + transposed_expr += ")"; + return transposed_expr; + } + else if (options.version < 120) + { + // GLSL 110, ES 100 do not have transpose(), so emulate it. Note that + // these GLSL versions do not support non-square matrices. + if (exp_type.vecsize == 2 && exp_type.columns == 2) + require_polyfill(PolyfillTranspose2x2, relaxed); + else if (exp_type.vecsize == 3 && exp_type.columns == 3) + require_polyfill(PolyfillTranspose3x3, relaxed); + else if (exp_type.vecsize == 4 && exp_type.columns == 4) + require_polyfill(PolyfillTranspose4x4, relaxed); + else + SPIRV_CROSS_THROW("Non-square matrices are not supported in legacy GLSL, cannot transpose."); + return join("spvTranspose", (options.es && relaxed) ? "MP" : "", "(", exp_str, ")"); + } + else + return join("transpose(", exp_str, ")"); +} + +string CompilerGLSL::variable_decl(const SPIRType &type, const string &name, uint32_t id) +{ + string type_name = type_to_glsl(type, id); + remap_variable_type_name(type, name, type_name); + return join(type_name, " ", name, type_to_array_glsl(type, id)); +} + +bool CompilerGLSL::variable_decl_is_remapped_storage(const SPIRVariable &var, StorageClass storage) const +{ + return var.storage == storage; +} + +// Emit a structure member. Subclasses may override to modify output, +// or to dynamically add a padding member if needed. +void CompilerGLSL::emit_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, + const string &qualifier, uint32_t) +{ + auto &membertype = get<SPIRType>(member_type_id); + + Bitset memberflags; + auto &memb = ir.meta[type.self].members; + if (index < memb.size()) + memberflags = memb[index].decoration_flags; + + string qualifiers; + bool is_block = ir.meta[type.self].decoration.decoration_flags.get(DecorationBlock) || + ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock); + + if (is_block) + qualifiers = to_interpolation_qualifiers(memberflags); + + statement(layout_for_member(type, index), qualifiers, qualifier, flags_to_qualifiers_glsl(membertype, memberflags), + variable_decl(membertype, to_member_name(type, index)), ";"); +} + +void CompilerGLSL::emit_struct_padding_target(const SPIRType &) +{ +} + +string CompilerGLSL::flags_to_qualifiers_glsl(const SPIRType &type, const Bitset &flags) +{ + // GL_EXT_buffer_reference variables can be marked as restrict. + if (flags.get(DecorationRestrictPointerEXT)) + return "restrict "; + + string qual; + + if (type_is_floating_point(type) && flags.get(DecorationNoContraction) && backend.support_precise_qualifier) + qual = "precise "; + + // Structs do not have precision qualifiers, neither do doubles (desktop only anyways, so no mediump/highp). + bool type_supports_precision = + type.basetype == SPIRType::Float || type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt || + type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage || + type.basetype == SPIRType::Sampler; + + if (!type_supports_precision) + return qual; + + if (options.es) + { + auto &execution = get_entry_point(); + + if (type.basetype == SPIRType::UInt && is_legacy_es()) + { + // HACK: This is a bool. See comment in type_to_glsl(). + qual += "lowp "; + } + else if (flags.get(DecorationRelaxedPrecision)) + { + bool implied_fmediump = type.basetype == SPIRType::Float && + options.fragment.default_float_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + bool implied_imediump = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + options.fragment.default_int_precision == Options::Mediump && + execution.model == ExecutionModelFragment; + + qual += (implied_fmediump || implied_imediump) ? "" : "mediump "; + } + else + { + bool implied_fhighp = + type.basetype == SPIRType::Float && ((options.fragment.default_float_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + bool implied_ihighp = (type.basetype == SPIRType::Int || type.basetype == SPIRType::UInt) && + ((options.fragment.default_int_precision == Options::Highp && + execution.model == ExecutionModelFragment) || + (execution.model != ExecutionModelFragment)); + + qual += (implied_fhighp || implied_ihighp) ? "" : "highp "; + } + } + else if (backend.allow_precision_qualifiers) + { + // Vulkan GLSL supports precision qualifiers, even in desktop profiles, which is convenient. + // The default is highp however, so only emit mediump in the rare case that a shader has these. + if (flags.get(DecorationRelaxedPrecision)) + qual += "mediump "; + } + + return qual; +} + +string CompilerGLSL::to_precision_qualifiers_glsl(uint32_t id) +{ + auto &type = expression_type(id); + bool use_precision_qualifiers = backend.allow_precision_qualifiers; + if (use_precision_qualifiers && (type.basetype == SPIRType::Image || type.basetype == SPIRType::SampledImage)) + { + // Force mediump for the sampler type. We cannot declare 16-bit or smaller image types. + auto &result_type = get<SPIRType>(type.image.type); + if (result_type.width < 32) + return "mediump "; + } + return flags_to_qualifiers_glsl(type, ir.meta[id].decoration.decoration_flags); +} + +void CompilerGLSL::fixup_io_block_patch_primitive_qualifiers(const SPIRVariable &var) +{ + // Works around weird behavior in glslangValidator where + // a patch out block is translated to just block members getting the decoration. + // To make glslang not complain when we compile again, we have to transform this back to a case where + // the variable itself has Patch decoration, and not members. + // Same for perprimitiveEXT. + auto &type = get<SPIRType>(var.basetype); + if (has_decoration(type.self, DecorationBlock)) + { + uint32_t member_count = uint32_t(type.member_types.size()); + Decoration promoted_decoration = {}; + bool do_promote_decoration = false; + for (uint32_t i = 0; i < member_count; i++) + { + if (has_member_decoration(type.self, i, DecorationPatch)) + { + promoted_decoration = DecorationPatch; + do_promote_decoration = true; + break; + } + else if (has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) + { + promoted_decoration = DecorationPerPrimitiveEXT; + do_promote_decoration = true; + break; + } + } + + if (do_promote_decoration) + { + set_decoration(var.self, promoted_decoration); + for (uint32_t i = 0; i < member_count; i++) + unset_member_decoration(type.self, i, promoted_decoration); + } + } +} + +string CompilerGLSL::to_qualifiers_glsl(uint32_t id) +{ + auto &flags = get_decoration_bitset(id); + string res; + + auto *var = maybe_get<SPIRVariable>(id); + + if (var && var->storage == StorageClassWorkgroup && !backend.shared_is_implied) + res += "shared "; + else if (var && var->storage == StorageClassTaskPayloadWorkgroupEXT && !backend.shared_is_implied) + res += "taskPayloadSharedEXT "; + + res += to_interpolation_qualifiers(flags); + if (var) + res += to_storage_qualifiers_glsl(*var); + + auto &type = expression_type(id); + if (type.image.dim != DimSubpassData && type.image.sampled == 2) + { + if (flags.get(DecorationCoherent)) + res += "coherent "; + if (flags.get(DecorationRestrict)) + res += "restrict "; + + if (flags.get(DecorationNonWritable)) + res += "readonly "; + + bool formatted_load = type.image.format == ImageFormatUnknown; + if (flags.get(DecorationNonReadable)) + { + res += "writeonly "; + formatted_load = false; + } + + if (formatted_load) + { + if (!options.es) + require_extension_internal("GL_EXT_shader_image_load_formatted"); + else + SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_image_load_formatted in ESSL."); + } + } + + res += to_precision_qualifiers_glsl(id); + + return res; +} + +string CompilerGLSL::argument_decl(const SPIRFunction::Parameter &arg) +{ + // glslangValidator seems to make all arguments pointer no matter what which is rather bizarre ... + auto &type = expression_type(arg.id); + const char *direction = ""; + + if (type.pointer) + { + if (arg.write_count && arg.read_count) + direction = "inout "; + else if (arg.write_count) + direction = "out "; + } + + return join(direction, to_qualifiers_glsl(arg.id), variable_decl(type, to_name(arg.id), arg.id)); +} + +string CompilerGLSL::to_initializer_expression(const SPIRVariable &var) +{ + return to_unpacked_expression(var.initializer); +} + +string CompilerGLSL::to_zero_initialized_expression(uint32_t type_id) +{ +#ifndef NDEBUG + auto &type = get<SPIRType>(type_id); + assert(type.storage == StorageClassPrivate || type.storage == StorageClassFunction || + type.storage == StorageClassGeneric); +#endif + uint32_t id = ir.increase_bound_by(1); + ir.make_constant_null(id, type_id, false); + return constant_expression(get<SPIRConstant>(id)); +} + +bool CompilerGLSL::type_can_zero_initialize(const SPIRType &type) const +{ + if (type.pointer) + return false; + + if (!type.array.empty() && options.flatten_multidimensional_arrays) + return false; + + for (auto &literal : type.array_size_literal) + if (!literal) + return false; + + for (auto &memb : type.member_types) + if (!type_can_zero_initialize(get<SPIRType>(memb))) + return false; + + return true; +} + +string CompilerGLSL::variable_decl(const SPIRVariable &variable) +{ + // Ignore the pointer type since GLSL doesn't have pointers. + auto &type = get_variable_data_type(variable); + + if (type.pointer_depth > 1 && !backend.support_pointer_to_pointer) + SPIRV_CROSS_THROW("Cannot declare pointer-to-pointer types."); + + auto res = join(to_qualifiers_glsl(variable.self), variable_decl(type, to_name(variable.self), variable.self)); + + if (variable.loop_variable && variable.static_expression) + { + uint32_t expr = variable.static_expression; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_unpacked_expression(variable.static_expression)); + else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); + } + else if (variable.initializer && !variable_decl_is_remapped_storage(variable, StorageClassWorkgroup)) + { + uint32_t expr = variable.initializer; + if (ir.ids[expr].get_type() != TypeUndef) + res += join(" = ", to_initializer_expression(variable)); + else if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + res += join(" = ", to_zero_initialized_expression(get_variable_data_type_id(variable))); + } + + return res; +} + +const char *CompilerGLSL::to_pls_qualifiers_glsl(const SPIRVariable &variable) +{ + auto &flags = get_decoration_bitset(variable.self); + if (flags.get(DecorationRelaxedPrecision)) + return "mediump "; + else + return "highp "; +} + +string CompilerGLSL::pls_decl(const PlsRemap &var) +{ + auto &variable = get<SPIRVariable>(var.id); + + auto op_and_basetype = pls_format_to_basetype(var.format); + + SPIRType type { op_and_basetype.first }; + type.basetype = op_and_basetype.second; + auto vecsize = pls_format_to_components(var.format); + if (vecsize > 1) + { + type.op = OpTypeVector; + type.vecsize = vecsize; + } + + return join(to_pls_layout(var.format), to_pls_qualifiers_glsl(variable), type_to_glsl(type), " ", + to_name(variable.self)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type) const +{ + return to_array_size_literal(type, uint32_t(type.array.size() - 1)); +} + +uint32_t CompilerGLSL::to_array_size_literal(const SPIRType &type, uint32_t index) const +{ + assert(type.array.size() == type.array_size_literal.size()); + + if (type.array_size_literal[index]) + { + return type.array[index]; + } + else + { + // Use the default spec constant value. + // This is the best we can do. + return evaluate_constant_u32(type.array[index]); + } +} + +string CompilerGLSL::to_array_size(const SPIRType &type, uint32_t index) +{ + assert(type.array.size() == type.array_size_literal.size()); + + auto &size = type.array[index]; + if (!type.array_size_literal[index]) + return to_expression(size); + else if (size) + return convert_to_string(size); + else if (!backend.unsized_array_supported) + { + // For runtime-sized arrays, we can work around + // lack of standard support for this by simply having + // a single element array. + // + // Runtime length arrays must always be the last element + // in an interface block. + return "1"; + } + else + return ""; +} + +string CompilerGLSL::type_to_array_glsl(const SPIRType &type, uint32_t) +{ + if (type.pointer && type.storage == StorageClassPhysicalStorageBufferEXT && type.basetype != SPIRType::Struct) + { + // We are using a wrapped pointer type, and we should not emit any array declarations here. + return ""; + } + + if (type.array.empty()) + return ""; + + if (options.flatten_multidimensional_arrays) + { + string res; + res += "["; + for (auto i = uint32_t(type.array.size()); i; i--) + { + res += enclose_expression(to_array_size(type, i - 1)); + if (i > 1) + res += " * "; + } + res += "]"; + return res; + } + else + { + if (type.array.size() > 1) + { + if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_arrays_of_arrays"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310. " + "Try using --flatten-multidimensional-arrays or set " + "options.flatten_multidimensional_arrays to true."); + } + + string res; + for (auto i = uint32_t(type.array.size()); i; i--) + { + res += "["; + res += to_array_size(type, i - 1); + res += "]"; + } + return res; + } +} + +string CompilerGLSL::image_type_glsl(const SPIRType &type, uint32_t id, bool /*member*/) +{ + auto &imagetype = get<SPIRType>(type.image.type); + string res; + + switch (imagetype.basetype) + { + case SPIRType::Int64: + res = "i64"; + require_extension_internal("GL_EXT_shader_image_int64"); + break; + case SPIRType::UInt64: + res = "u64"; + require_extension_internal("GL_EXT_shader_image_int64"); + break; + case SPIRType::Int: + case SPIRType::Short: + case SPIRType::SByte: + res = "i"; + break; + case SPIRType::UInt: + case SPIRType::UShort: + case SPIRType::UByte: + res = "u"; + break; + default: + break; + } + + // For half image types, we will force mediump for the sampler, and cast to f16 after any sampling operation. + // We cannot express a true half texture type in GLSL. Neither for short integer formats for that matter. + + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && options.vulkan_semantics) + return res + "subpassInput" + (type.image.ms ? "MS" : ""); + else if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData && + subpass_input_is_framebuffer_fetch(id)) + { + SPIRType sampled_type = get<SPIRType>(type.image.type); + sampled_type.vecsize = 4; + return type_to_glsl(sampled_type); + } + + // If we're emulating subpassInput with samplers, force sampler2D + // so we don't have to specify format. + if (type.basetype == SPIRType::Image && type.image.dim != DimSubpassData) + { + // Sampler buffers are always declared as samplerBuffer even though they might be separate images in the SPIR-V. + if (type.image.dim == DimBuffer && type.image.sampled == 1) + res += "sampler"; + else + res += type.image.sampled == 2 ? "image" : "texture"; + } + else + res += "sampler"; + + switch (type.image.dim) + { + case Dim1D: + // ES doesn't support 1D. Fake it with 2D. + res += options.es ? "2D" : "1D"; + break; + case Dim2D: + res += "2D"; + break; + case Dim3D: + res += "3D"; + break; + case DimCube: + res += "Cube"; + break; + case DimRect: + if (options.es) + SPIRV_CROSS_THROW("Rectangle textures are not supported on OpenGL ES."); + + if (is_legacy_desktop()) + require_extension_internal("GL_ARB_texture_rectangle"); + + res += "2DRect"; + break; + + case DimBuffer: + if (options.es && options.version < 320) + require_extension_internal("GL_EXT_texture_buffer"); + else if (!options.es && options.version < 300) + require_extension_internal("GL_EXT_texture_buffer_object"); + res += "Buffer"; + break; + + case DimSubpassData: + res += "2D"; + break; + default: + SPIRV_CROSS_THROW("Only 1D, 2D, 2DRect, 3D, Buffer, InputTarget and Cube textures supported."); + } + + if (type.image.ms) + res += "MS"; + if (type.image.arrayed) + { + if (is_legacy_desktop()) + require_extension_internal("GL_EXT_texture_array"); + res += "Array"; + } + + // "Shadow" state in GLSL only exists for samplers and combined image samplers. + if (((type.basetype == SPIRType::SampledImage) || (type.basetype == SPIRType::Sampler)) && + is_depth_image(type, id)) + { + res += "Shadow"; + + if (type.image.dim == DimCube && is_legacy()) + { + if (!options.es) + require_extension_internal("GL_EXT_gpu_shader4"); + else + { + require_extension_internal("GL_NV_shadow_samplers_cube"); + res += "NV"; + } + } + } + + return res; +} + +string CompilerGLSL::type_to_glsl_constructor(const SPIRType &type) +{ + if (backend.use_array_constructor && type.array.size() > 1) + { + if (options.flatten_multidimensional_arrays) + SPIRV_CROSS_THROW("Cannot flatten constructors of multidimensional array constructors, " + "e.g. float[][]()."); + else if (!options.es && options.version < 430) + require_extension_internal("GL_ARB_arrays_of_arrays"); + else if (options.es && options.version < 310) + SPIRV_CROSS_THROW("Arrays of arrays not supported before ESSL version 310."); + } + + auto e = type_to_glsl(type); + if (backend.use_array_constructor) + { + for (uint32_t i = 0; i < type.array.size(); i++) + e += "[]"; + } + return e; +} + +// The optional id parameter indicates the object whose type we are trying +// to find the description for. It is optional. Most type descriptions do not +// depend on a specific object's use of that type. +string CompilerGLSL::type_to_glsl(const SPIRType &type, uint32_t id) +{ + if (is_physical_pointer(type) && !is_physical_pointer_to_buffer_block(type)) + { + // Need to create a magic type name which compacts the entire type information. + auto *parent = &get_pointee_type(type); + string name = type_to_glsl(*parent); + + uint32_t array_stride = get_decoration(type.parent_type, DecorationArrayStride); + + // Resolve all array dimensions in one go since once we lose the pointer type, + // array information is left to to_array_type_glsl. The base type loses array information. + while (is_array(*parent)) + { + if (parent->array_size_literal.back()) + name += join(type.array.back(), "_"); + else + name += join("id", type.array.back(), "_"); + + name += "stride_" + std::to_string(array_stride); + + array_stride = get_decoration(parent->parent_type, DecorationArrayStride); + parent = &get<SPIRType>(parent->parent_type); + } + + name += "Pointer"; + return name; + } + + switch (type.basetype) + { + case SPIRType::Struct: + // Need OpName lookup here to get a "sensible" name for a struct. + if (backend.explicit_struct_type) + return join("struct ", to_name(type.self)); + else + return to_name(type.self); + + case SPIRType::Image: + case SPIRType::SampledImage: + return image_type_glsl(type, id); + + case SPIRType::Sampler: + // The depth field is set by calling code based on the variable ID of the sampler, effectively reintroducing + // this distinction into the type system. + return comparison_ids.count(id) ? "samplerShadow" : "sampler"; + + case SPIRType::AccelerationStructure: + return ray_tracing_is_khr ? "accelerationStructureEXT" : "accelerationStructureNV"; + + case SPIRType::RayQuery: + return "rayQueryEXT"; + + case SPIRType::Void: + return "void"; + + default: + break; + } + + if (type.basetype == SPIRType::UInt && is_legacy()) + { + if (options.es) + // HACK: spirv-cross changes bools into uints and generates code which compares them to + // zero. Input code will have already been validated as not to have contained any uints, + // so any remaining uints must in fact be bools. However, simply returning "bool" here + // will result in invalid code. Instead, return an int. + return backend.basic_int_type; + else + require_extension_internal("GL_EXT_gpu_shader4"); + } + + if (type.basetype == SPIRType::AtomicCounter) + { + if (options.es && options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for atomic counters."); + else if (!options.es && options.version < 420) + require_extension_internal("GL_ARB_shader_atomic_counters"); + } + + if (type.vecsize == 1 && type.columns == 1) // Scalar builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return "bool"; + case SPIRType::SByte: + return backend.basic_int8_type; + case SPIRType::UByte: + return backend.basic_uint8_type; + case SPIRType::Short: + return backend.basic_int16_type; + case SPIRType::UShort: + return backend.basic_uint16_type; + case SPIRType::Int: + return backend.basic_int_type; + case SPIRType::UInt: + return backend.basic_uint_type; + case SPIRType::AtomicCounter: + return "atomic_uint"; + case SPIRType::Half: + return "float16_t"; + case SPIRType::Float: + return "float"; + case SPIRType::Double: + return "double"; + case SPIRType::Int64: + return "int64_t"; + case SPIRType::UInt64: + return "uint64_t"; + default: + return "???"; + } + } + else if (type.vecsize > 1 && type.columns == 1) // Vector builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bvec", type.vecsize); + case SPIRType::SByte: + return join("i8vec", type.vecsize); + case SPIRType::UByte: + return join("u8vec", type.vecsize); + case SPIRType::Short: + return join("i16vec", type.vecsize); + case SPIRType::UShort: + return join("u16vec", type.vecsize); + case SPIRType::Int: + return join("ivec", type.vecsize); + case SPIRType::UInt: + return join("uvec", type.vecsize); + case SPIRType::Half: + return join("f16vec", type.vecsize); + case SPIRType::Float: + return join("vec", type.vecsize); + case SPIRType::Double: + return join("dvec", type.vecsize); + case SPIRType::Int64: + return join("i64vec", type.vecsize); + case SPIRType::UInt64: + return join("u64vec", type.vecsize); + default: + return "???"; + } + } + else if (type.vecsize == type.columns) // Simple Matrix builtin + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bmat", type.vecsize); + case SPIRType::Int: + return join("imat", type.vecsize); + case SPIRType::UInt: + return join("umat", type.vecsize); + case SPIRType::Half: + return join("f16mat", type.vecsize); + case SPIRType::Float: + return join("mat", type.vecsize); + case SPIRType::Double: + return join("dmat", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } + else + { + switch (type.basetype) + { + case SPIRType::Boolean: + return join("bmat", type.columns, "x", type.vecsize); + case SPIRType::Int: + return join("imat", type.columns, "x", type.vecsize); + case SPIRType::UInt: + return join("umat", type.columns, "x", type.vecsize); + case SPIRType::Half: + return join("f16mat", type.columns, "x", type.vecsize); + case SPIRType::Float: + return join("mat", type.columns, "x", type.vecsize); + case SPIRType::Double: + return join("dmat", type.columns, "x", type.vecsize); + // Matrix types not supported for int64/uint64. + default: + return "???"; + } + } +} + +void CompilerGLSL::add_variable(unordered_set<string> &variables_primary, + const unordered_set<string> &variables_secondary, string &name) +{ + if (name.empty()) + return; + + ParsedIR::sanitize_underscores(name); + if (ParsedIR::is_globally_reserved_identifier(name, true)) + { + name.clear(); + return; + } + + update_name_cache(variables_primary, variables_secondary, name); +} + +void CompilerGLSL::add_local_variable_name(uint32_t id) +{ + add_variable(local_variable_names, block_names, ir.meta[id].decoration.alias); +} + +void CompilerGLSL::add_resource_name(uint32_t id) +{ + add_variable(resource_names, block_names, ir.meta[id].decoration.alias); +} + +void CompilerGLSL::add_header_line(const std::string &line) +{ + header_lines.push_back(line); +} + +bool CompilerGLSL::has_extension(const std::string &ext) const +{ + auto itr = find(begin(forced_extensions), end(forced_extensions), ext); + return itr != end(forced_extensions); +} + +void CompilerGLSL::require_extension(const std::string &ext) +{ + if (!has_extension(ext)) + forced_extensions.push_back(ext); +} + +const SmallVector<std::string> &CompilerGLSL::get_required_extensions() const +{ + return forced_extensions; +} + +void CompilerGLSL::require_extension_internal(const string &ext) +{ + if (backend.supports_extensions && !has_extension(ext)) + { + forced_extensions.push_back(ext); + force_recompile(); + } +} + +void CompilerGLSL::flatten_buffer_block(VariableID id) +{ + auto &var = get<SPIRVariable>(id); + auto &type = get<SPIRType>(var.basetype); + auto name = to_name(type.self, false); + auto &flags = get_decoration_bitset(type.self); + + if (!type.array.empty()) + SPIRV_CROSS_THROW(name + " is an array of UBOs."); + if (type.basetype != SPIRType::Struct) + SPIRV_CROSS_THROW(name + " is not a struct."); + if (!flags.get(DecorationBlock)) + SPIRV_CROSS_THROW(name + " is not a block."); + if (type.member_types.empty()) + SPIRV_CROSS_THROW(name + " is an empty struct."); + + flattened_buffer_blocks.insert(id); +} + +bool CompilerGLSL::builtin_translates_to_nonarray(spv::BuiltIn /*builtin*/) const +{ + return false; // GLSL itself does not need to translate array builtin types to non-array builtin types +} + +bool CompilerGLSL::is_user_type_structured(uint32_t /*id*/) const +{ + return false; // GLSL itself does not have structured user type, but HLSL does with StructuredBuffer and RWStructuredBuffer resources. +} + +bool CompilerGLSL::check_atomic_image(uint32_t id) +{ + auto &type = expression_type(id); + if (type.storage == StorageClassImage) + { + if (options.es && options.version < 320) + require_extension_internal("GL_OES_shader_image_atomic"); + + auto *var = maybe_get_backing_variable(id); + if (var) + { + if (has_decoration(var->self, DecorationNonWritable) || has_decoration(var->self, DecorationNonReadable)) + { + unset_decoration(var->self, DecorationNonWritable); + unset_decoration(var->self, DecorationNonReadable); + force_recompile(); + } + } + return true; + } + else + return false; +} + +void CompilerGLSL::add_function_overload(const SPIRFunction &func) +{ + Hasher hasher; + for (auto &arg : func.arguments) + { + // Parameters can vary with pointer type or not, + // but that will not change the signature in GLSL/HLSL, + // so strip the pointer type before hashing. + uint32_t type_id = get_pointee_type_id(arg.type); + auto &type = get<SPIRType>(type_id); + + if (!combined_image_samplers.empty()) + { + // If we have combined image samplers, we cannot really trust the image and sampler arguments + // we pass down to callees, because they may be shuffled around. + // Ignore these arguments, to make sure that functions need to differ in some other way + // to be considered different overloads. + if (type.basetype == SPIRType::SampledImage || + (type.basetype == SPIRType::Image && type.image.sampled == 1) || type.basetype == SPIRType::Sampler) + { + continue; + } + } + + hasher.u32(type_id); + } + uint64_t types_hash = hasher.get(); + + auto function_name = to_name(func.self); + auto itr = function_overloads.find(function_name); + if (itr != end(function_overloads)) + { + // There exists a function with this name already. + auto &overloads = itr->second; + if (overloads.count(types_hash) != 0) + { + // Overload conflict, assign a new name. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(types_hash); + } + else + { + // Can reuse the name. + overloads.insert(types_hash); + } + } + else + { + // First time we see this function name. + add_resource_name(func.self); + function_overloads[to_name(func.self)].insert(types_hash); + } +} + +void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) +{ + if (func.self != ir.default_entry_point) + add_function_overload(func); + + // Avoid shadow declarations. + local_variable_names = resource_names; + + string decl; + + auto &type = get<SPIRType>(func.return_type); + decl += flags_to_qualifiers_glsl(type, return_flags); + decl += type_to_glsl(type); + decl += type_to_array_glsl(type, 0); + decl += " "; + + if (func.self == ir.default_entry_point) + { + // If we need complex fallback in GLSL, we just wrap main() in a function + // and interlock the entire shader ... + if (interlocked_is_complex) + decl += "spvMainInterlockedBody"; + else + decl += "main"; + + processing_entry_point = true; + } + else + decl += to_name(func.self); + + decl += "("; + SmallVector<string> arglist; + for (auto &arg : func.arguments) + { + // Do not pass in separate images or samplers if we're remapping + // to combined image samplers. + if (skip_argument(arg.id)) + continue; + + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get<SPIRVariable>(arg.id); + if (var) + var->parameter = &arg; + } + + for (auto &arg : func.shadow_arguments) + { + // Might change the variable name if it already exists in this function. + // SPIRV OpName doesn't have any semantic effect, so it's valid for an implementation + // to use same name for variables. + // Since we want to make the GLSL debuggable and somewhat sane, use fallback names for variables which are duplicates. + add_local_variable_name(arg.id); + + arglist.push_back(argument_decl(arg)); + + // Hold a pointer to the parameter so we can invalidate the readonly field if needed. + auto *var = maybe_get<SPIRVariable>(arg.id); + if (var) + var->parameter = &arg; + } + + decl += merge(arglist); + decl += ")"; + statement(decl); +} + +void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) +{ + // Avoid potential cycles. + if (func.active) + return; + func.active = true; + + // If we depend on a function, emit that function before we emit our own function. + for (auto block : func.blocks) + { + auto &b = get<SPIRBlock>(block); + for (auto &i : b.ops) + { + auto ops = stream(i); + auto op = static_cast<Op>(i.op); + + if (op == OpFunctionCall) + { + // Recursively emit functions which are called. + uint32_t id = ops[2]; + emit_function(get<SPIRFunction>(id), ir.meta[ops[1]].decoration.decoration_flags); + } + } + } + + if (func.entry_line.file_id != 0) + emit_line_directive(func.entry_line.file_id, func.entry_line.line_literal); + emit_function_prototype(func, return_flags); + begin_scope(); + + if (func.self == ir.default_entry_point) + emit_entry_point_declarations(); + + current_function = &func; + auto &entry_block = get<SPIRBlock>(func.entry_block); + + sort(begin(func.constant_arrays_needed_on_stack), end(func.constant_arrays_needed_on_stack)); + for (auto &array : func.constant_arrays_needed_on_stack) + { + auto &c = get<SPIRConstant>(array); + auto &type = get<SPIRType>(c.constant_type); + statement(variable_decl(type, join("_", array, "_array_copy")), " = ", constant_expression(c), ";"); + } + + for (auto &v : func.local_variables) + { + auto &var = get<SPIRVariable>(v); + var.deferred_declaration = false; + + if (variable_decl_is_remapped_storage(var, StorageClassWorkgroup)) + { + // Special variable type which cannot have initializer, + // need to be declared as standalone variables. + // Comes from MSL which can push global variables as local variables in main function. + add_local_variable_name(var.self); + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + else if (var.storage == StorageClassPrivate) + { + // These variables will not have had their CFG usage analyzed, so move it to the entry block. + // Comes from MSL which can push global variables as local variables in main function. + // We could just declare them right now, but we would miss out on an important initialization case which is + // LUT declaration in MSL. + // If we don't declare the variable when it is assigned we're forced to go through a helper function + // which copies elements one by one. + add_local_variable_name(var.self); + + if (var.initializer) + { + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + else + { + auto &dominated = entry_block.dominated_variables; + if (find(begin(dominated), end(dominated), var.self) == end(dominated)) + entry_block.dominated_variables.push_back(var.self); + var.deferred_declaration = true; + } + } + else if (var.storage == StorageClassFunction && var.remapped_variable && var.static_expression) + { + // No need to declare this variable, it has a static expression. + var.deferred_declaration = false; + } + else if (expression_is_lvalue(v)) + { + add_local_variable_name(var.self); + + // Loop variables should never be declared early, they are explicitly emitted in a loop. + if (var.initializer && !var.loop_variable) + statement(variable_decl_function_local(var), ";"); + else + { + // Don't declare variable until first use to declutter the GLSL output quite a lot. + // If we don't touch the variable before first branch, + // declare it then since we need variable declaration to be in top scope. + var.deferred_declaration = true; + } + } + else + { + // HACK: SPIR-V in older glslang output likes to use samplers and images as local variables, but GLSL does not allow this. + // For these types (non-lvalue), we enforce forwarding through a shadowed variable. + // This means that when we OpStore to these variables, we just write in the expression ID directly. + // This breaks any kind of branching, since the variable must be statically assigned. + // Branching on samplers and images would be pretty much impossible to fake in GLSL. + var.statically_assigned = true; + } + + var.loop_variable_enable = false; + + // Loop variables are never declared outside their for-loop, so block any implicit declaration. + if (var.loop_variable) + { + var.deferred_declaration = false; + // Need to reset the static expression so we can fallback to initializer if need be. + var.static_expression = 0; + } + } + + // Enforce declaration order for regression testing purposes. + for (auto &block_id : func.blocks) + { + auto &block = get<SPIRBlock>(block_id); + sort(begin(block.dominated_variables), end(block.dominated_variables)); + } + + for (auto &line : current_function->fixup_hooks_in) + line(); + + emit_block_chain(entry_block); + + end_scope(); + processing_entry_point = false; + statement(""); + + // Make sure deferred declaration state for local variables is cleared when we are done with function. + // We risk declaring Private/Workgroup variables in places we are not supposed to otherwise. + for (auto &v : func.local_variables) + { + auto &var = get<SPIRVariable>(v); + var.deferred_declaration = false; + } +} + +void CompilerGLSL::emit_fixup() +{ + if (is_vertex_like_shader()) + { + if (options.vertex.fixup_clipspace) + { + const char *suffix = backend.float_literal_suffix ? "f" : ""; + statement("gl_Position.z = 2.0", suffix, " * gl_Position.z - gl_Position.w;"); + } + + if (options.vertex.flip_vert_y) + statement("gl_Position.y = -gl_Position.y;"); + } +} + +void CompilerGLSL::flush_phi(BlockID from, BlockID to) +{ + auto &child = get<SPIRBlock>(to); + if (child.ignore_phi_from_block == from) + return; + + unordered_set<uint32_t> temporary_phi_variables; + + for (auto itr = begin(child.phi_variables); itr != end(child.phi_variables); ++itr) + { + auto &phi = *itr; + + if (phi.parent == from) + { + auto &var = get<SPIRVariable>(phi.function_variable); + + // A Phi variable might be a loop variable, so flush to static expression. + if (var.loop_variable && !var.loop_variable_enable) + var.static_expression = phi.local_variable; + else + { + flush_variable_declaration(phi.function_variable); + + // Check if we are going to write to a Phi variable that another statement will read from + // as part of another Phi node in our target block. + // For this case, we will need to copy phi.function_variable to a temporary, and use that for future reads. + // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. + bool need_saved_temporary = + find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { + return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; + }) != end(child.phi_variables); + + if (need_saved_temporary) + { + // Need to make sure we declare the phi variable with a copy at the right scope. + // We cannot safely declare a temporary here since we might be inside a continue block. + if (!var.allocate_temporary_copy) + { + var.allocate_temporary_copy = true; + force_recompile(); + } + statement("_", phi.function_variable, "_copy", " = ", to_name(phi.function_variable), ";"); + temporary_phi_variables.insert(phi.function_variable); + } + + // This might be called in continue block, so make sure we + // use this to emit ESSL 1.0 compliant increments/decrements. + auto lhs = to_expression(phi.function_variable); + + string rhs; + if (temporary_phi_variables.count(phi.local_variable)) + rhs = join("_", phi.local_variable, "_copy"); + else + rhs = to_pointer_expression(phi.local_variable); + + if (!optimize_read_modify_write(get<SPIRType>(var.basetype), lhs, rhs)) + statement(lhs, " = ", rhs, ";"); + } + + register_write(phi.function_variable); + } + } +} + +void CompilerGLSL::branch_to_continue(BlockID from, BlockID to) +{ + auto &to_block = get<SPIRBlock>(to); + if (from == to) + return; + + assert(is_continue(to)); + if (to_block.complex_continue) + { + // Just emit the whole block chain as is. + auto usage_counts = expression_usage_counts; + + emit_block_chain(to_block); + + // Expression usage counts are moot after returning from the continue block. + expression_usage_counts = usage_counts; + } + else + { + auto &from_block = get<SPIRBlock>(from); + bool outside_control_flow = false; + uint32_t loop_dominator = 0; + + // FIXME: Refactor this to not use the old loop_dominator tracking. + if (from_block.merge_block) + { + // If we are a loop header, we don't set the loop dominator, + // so just use "self" here. + loop_dominator = from; + } + else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator)) + { + loop_dominator = from_block.loop_dominator; + } + + if (loop_dominator != 0) + { + auto &cfg = get_cfg_for_current_function(); + + // For non-complex continue blocks, we implicitly branch to the continue block + // by having the continue block be part of the loop header in for (; ; continue-block). + outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(loop_dominator, from); + } + + // Some simplification for for-loops. We always end up with a useless continue; + // statement since we branch to a loop block. + // Walk the CFG, if we unconditionally execute the block calling continue assuming we're in the loop block, + // we can avoid writing out an explicit continue statement. + // Similar optimization to return statements if we know we're outside flow control. + if (!outside_control_flow) + statement("continue;"); + } +} + +void CompilerGLSL::branch(BlockID from, BlockID to) +{ + flush_phi(from, to); + flush_control_dependent_expressions(from); + + bool to_is_continue = is_continue(to); + + // This is only a continue if we branch to our loop dominator. + if ((ir.block_meta[to] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) != 0 && get<SPIRBlock>(from).loop_dominator == to) + { + // This can happen if we had a complex continue block which was emitted. + // Once the continue block tries to branch to the loop header, just emit continue; + // and end the chain here. + statement("continue;"); + } + else if (from != to && is_break(to)) + { + // We cannot break to ourselves, so check explicitly for from != to. + // This case can trigger if a loop header is all three of these things: + // - Continue block + // - Loop header + // - Break merge target all at once ... + + // Very dirty workaround. + // Switch constructs are able to break, but they cannot break out of a loop at the same time, + // yet SPIR-V allows it. + // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, + // write to the ladder here, and defer the break. + // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. + if (is_loop_break(to)) + { + for (size_t n = current_emitting_switch_stack.size(); n; n--) + { + auto *current_emitting_switch = current_emitting_switch_stack[n - 1]; + + if (current_emitting_switch && + current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) && + get<SPIRBlock>(current_emitting_switch->loop_dominator).merge_block == to) + { + if (!current_emitting_switch->need_ladder_break) + { + force_recompile(); + current_emitting_switch->need_ladder_break = true; + } + + statement("_", current_emitting_switch->self, "_ladder_break = true;"); + } + else + break; + } + } + statement("break;"); + } + else if (to_is_continue || from == to) + { + // For from == to case can happen for a do-while loop which branches into itself. + // We don't mark these cases as continue blocks, but the only possible way to branch into + // ourselves is through means of continue blocks. + + // If we are merging to a continue block, there is no need to emit the block chain for continue here. + // We can branch to the continue block after we merge execution. + + // Here we make use of structured control flow rules from spec: + // 2.11: - the merge block declared by a header block cannot be a merge block declared by any other header block + // - each header block must strictly dominate its merge block, unless the merge block is unreachable in the CFG + // If we are branching to a merge block, we must be inside a construct which dominates the merge block. + auto &block_meta = ir.block_meta[to]; + bool branching_to_merge = + (block_meta & (ParsedIR::BLOCK_META_SELECTION_MERGE_BIT | ParsedIR::BLOCK_META_MULTISELECT_MERGE_BIT | + ParsedIR::BLOCK_META_LOOP_MERGE_BIT)) != 0; + if (!to_is_continue || !branching_to_merge) + branch_to_continue(from, to); + } + else if (!is_conditional(to)) + emit_block_chain(get<SPIRBlock>(to)); + + // It is important that we check for break before continue. + // A block might serve two purposes, a break block for the inner scope, and + // a continue block in the outer scope. + // Inner scope always takes precedence. +} + +void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block) +{ + auto &from_block = get<SPIRBlock>(from); + BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0); + + // If we branch directly to our selection merge target, we don't need a code path. + bool true_block_needs_code = true_block != merge_block || flush_phi_required(from, true_block); + bool false_block_needs_code = false_block != merge_block || flush_phi_required(from, false_block); + + if (!true_block_needs_code && !false_block_needs_code) + return; + + // We might have a loop merge here. Only consider selection flattening constructs. + // Loop hints are handled explicitly elsewhere. + if (from_block.hint == SPIRBlock::HintFlatten || from_block.hint == SPIRBlock::HintDontFlatten) + emit_block_hints(from_block); + + if (true_block_needs_code) + { + statement("if (", to_expression(cond), ")"); + begin_scope(); + branch(from, true_block); + end_scope(); + + if (false_block_needs_code) + { + statement("else"); + begin_scope(); + branch(from, false_block); + end_scope(); + } + } + else if (false_block_needs_code) + { + // Only need false path, use negative conditional. + statement("if (!", to_enclosed_expression(cond), ")"); + begin_scope(); + branch(from, false_block); + end_scope(); + } +} + +// FIXME: This currently cannot handle complex continue blocks +// as in do-while. +// This should be seen as a "trivial" continue block. +string CompilerGLSL::emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block) +{ + auto *block = &get<SPIRBlock>(continue_block); + + // While emitting the continue block, declare_temporary will check this + // if we have to emit temporaries. + current_continue_block = block; + + SmallVector<string> statements; + + // Capture all statements into our list. + auto *old = redirect_statement; + redirect_statement = &statements; + + // Stamp out all blocks one after each other. + while ((ir.block_meta[block->self] & ParsedIR::BLOCK_META_LOOP_HEADER_BIT) == 0) + { + // Write out all instructions we have in this block. + emit_block_instructions(*block); + + // For plain branchless for/while continue blocks. + if (block->next_block) + { + flush_phi(continue_block, block->next_block); + block = &get<SPIRBlock>(block->next_block); + } + // For do while blocks. The last block will be a select block. + else if (block->true_block && follow_true_block) + { + flush_phi(continue_block, block->true_block); + block = &get<SPIRBlock>(block->true_block); + } + else if (block->false_block && follow_false_block) + { + flush_phi(continue_block, block->false_block); + block = &get<SPIRBlock>(block->false_block); + } + else + { + SPIRV_CROSS_THROW("Invalid continue block detected!"); + } + } + + // Restore old pointer. + redirect_statement = old; + + // Somewhat ugly, strip off the last ';' since we use ',' instead. + // Ideally, we should select this behavior in statement(). + for (auto &s : statements) + { + if (!s.empty() && s.back() == ';') + s.erase(s.size() - 1, 1); + } + + current_continue_block = nullptr; + return merge(statements); +} + +void CompilerGLSL::emit_while_loop_initializers(const SPIRBlock &block) +{ + // While loops do not take initializers, so declare all of them outside. + for (auto &loop_var : block.loop_variables) + { + auto &var = get<SPIRVariable>(loop_var); + statement(variable_decl(var), ";"); + } +} + +string CompilerGLSL::emit_for_loop_initializers(const SPIRBlock &block) +{ + if (block.loop_variables.empty()) + return ""; + + bool same_types = for_loop_initializers_are_same_type(block); + // We can only declare for loop initializers if all variables are of same type. + // If we cannot do this, declare individual variables before the loop header. + + // We might have a loop variable candidate which was not assigned to for some reason. + uint32_t missing_initializers = 0; + for (auto &variable : block.loop_variables) + { + uint32_t expr = get<SPIRVariable>(variable).static_expression; + + // Sometimes loop variables are initialized with OpUndef, but we can just declare + // a plain variable without initializer in this case. + if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) + missing_initializers++; + } + + if (block.loop_variables.size() == 1 && missing_initializers == 0) + { + return variable_decl(get<SPIRVariable>(block.loop_variables.front())); + } + else if (!same_types || missing_initializers == uint32_t(block.loop_variables.size())) + { + for (auto &loop_var : block.loop_variables) + statement(variable_decl(get<SPIRVariable>(loop_var)), ";"); + return ""; + } + else + { + // We have a mix of loop variables, either ones with a clear initializer, or ones without. + // Separate the two streams. + string expr; + + for (auto &loop_var : block.loop_variables) + { + uint32_t static_expr = get<SPIRVariable>(loop_var).static_expression; + if (static_expr == 0 || ir.ids[static_expr].get_type() == TypeUndef) + { + statement(variable_decl(get<SPIRVariable>(loop_var)), ";"); + } + else + { + auto &var = get<SPIRVariable>(loop_var); + auto &type = get_variable_data_type(var); + if (expr.empty()) + { + // For loop initializers are of the form <type id = value, id = value, id = value, etc ... + expr = join(to_qualifiers_glsl(var.self), type_to_glsl(type), " "); + } + else + { + expr += ", "; + // In MSL, being based on C++, the asterisk marking a pointer + // binds to the identifier, not the type. + if (type.pointer) + expr += "* "; + } + + expr += join(to_name(loop_var), " = ", to_pointer_expression(var.static_expression)); + } + } + return expr; + } +} + +bool CompilerGLSL::for_loop_initializers_are_same_type(const SPIRBlock &block) +{ + if (block.loop_variables.size() <= 1) + return true; + + uint32_t expected = 0; + Bitset expected_flags; + for (auto &var : block.loop_variables) + { + // Don't care about uninitialized variables as they will not be part of the initializers. + uint32_t expr = get<SPIRVariable>(var).static_expression; + if (expr == 0 || ir.ids[expr].get_type() == TypeUndef) + continue; + + if (expected == 0) + { + expected = get<SPIRVariable>(var).basetype; + expected_flags = get_decoration_bitset(var); + } + else if (expected != get<SPIRVariable>(var).basetype) + return false; + + // Precision flags and things like that must also match. + if (expected_flags != get_decoration_bitset(var)) + return false; + } + + return true; +} + +void CompilerGLSL::emit_block_instructions_with_masked_debug(SPIRBlock &block) +{ + // Have to block debug instructions such as OpLine here, since it will be treated as a statement otherwise, + // which breaks loop optimizations. + // Any line directive would be declared outside the loop body, which would just be confusing either way. + bool old_block_debug_directives = block_debug_directives; + block_debug_directives = true; + emit_block_instructions(block); + block_debug_directives = old_block_debug_directives; +} + +bool CompilerGLSL::attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method) +{ + SPIRBlock::ContinueBlockType continue_type = continue_block_type(get<SPIRBlock>(block.continue_block)); + + if (method == SPIRBlock::MergeToSelectForLoop || method == SPIRBlock::MergeToSelectContinueForLoop) + { + uint32_t current_count = statement_count; + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + emit_block_instructions_with_masked_debug(block); + + bool condition_is_temporary = forced_temporaries.find(block.condition) == end(forced_temporaries); + + bool flushes_phi = flush_phi_required(block.self, block.true_block) || + flush_phi_required(block.self, block.false_block); + + // This can work! We only did trivial things which could be forwarded in block body! + if (!flushes_phi && current_count == statement_count && condition_is_temporary) + { + switch (continue_type) + { + case SPIRBlock::ForLoop: + { + // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. + flush_undeclared_variables(block); + + // Important that we do this in this order because + // emitting the continue block can invalidate the condition expression. + auto initializer = emit_for_loop_initializers(block); + auto condition = to_expression(block.condition); + + // Condition might have to be inverted. + if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block))) + condition = join("!", enclose_expression(condition)); + + emit_block_hints(block); + if (method != SPIRBlock::MergeToSelectContinueForLoop) + { + auto continue_block = emit_continue_block(block.continue_block, false, false); + statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); + } + else + statement("for (", initializer, "; ", condition, "; )"); + break; + } + + case SPIRBlock::WhileLoop: + { + // This block may be a dominating block, so make sure we flush undeclared variables before building the while loop header. + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emit_block_hints(block); + + auto condition = to_expression(block.condition); + // Condition might have to be inverted. + if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block))) + condition = join("!", enclose_expression(condition)); + + statement("while (", condition, ")"); + break; + } + + default: + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + + begin_scope(); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else if (method == SPIRBlock::MergeToDirectForLoop) + { + auto &child = get<SPIRBlock>(block.next_block); + + // This block may be a dominating block, so make sure we flush undeclared variables before building the for loop header. + flush_undeclared_variables(child); + + uint32_t current_count = statement_count; + + // If we're trying to create a true for loop, + // we need to make sure that all opcodes before branch statement do not actually emit any code. + // We can then take the condition expression and create a for (; cond ; ) { body; } structure instead. + emit_block_instructions_with_masked_debug(child); + + bool condition_is_temporary = forced_temporaries.find(child.condition) == end(forced_temporaries); + + bool flushes_phi = flush_phi_required(child.self, child.true_block) || + flush_phi_required(child.self, child.false_block); + + if (!flushes_phi && current_count == statement_count && condition_is_temporary) + { + uint32_t target_block = child.true_block; + + switch (continue_type) + { + case SPIRBlock::ForLoop: + { + // Important that we do this in this order because + // emitting the continue block can invalidate the condition expression. + auto initializer = emit_for_loop_initializers(block); + auto condition = to_expression(child.condition); + + // Condition might have to be inverted. + if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block))) + { + condition = join("!", enclose_expression(condition)); + target_block = child.false_block; + } + + auto continue_block = emit_continue_block(block.continue_block, false, false); + emit_block_hints(block); + statement("for (", initializer, "; ", condition, "; ", continue_block, ")"); + break; + } + + case SPIRBlock::WhileLoop: + { + emit_while_loop_initializers(block); + emit_block_hints(block); + + auto condition = to_expression(child.condition); + // Condition might have to be inverted. + if (execution_is_noop(get<SPIRBlock>(child.true_block), get<SPIRBlock>(block.merge_block))) + { + condition = join("!", enclose_expression(condition)); + target_block = child.false_block; + } + + statement("while (", condition, ")"); + break; + } + + default: + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + + begin_scope(); + branch(child.self, target_block); + return true; + } + else + { + block.disable_block_optimization = true; + force_recompile(); + begin_scope(); // We'll see an end_scope() later. + return false; + } + } + else + return false; +} + +void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) +{ + for (auto &v : block.dominated_variables) + flush_variable_declaration(v); +} + +void CompilerGLSL::emit_hoisted_temporaries(SmallVector<pair<TypeID, ID>> &temporaries) +{ + // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. + // Need to sort these to ensure that reference output is stable. + sort(begin(temporaries), end(temporaries), + [](const pair<TypeID, ID> &a, const pair<TypeID, ID> &b) { return a.second < b.second; }); + + for (auto &tmp : temporaries) + { + auto &type = get<SPIRType>(tmp.first); + + // There are some rare scenarios where we are asked to declare pointer types as hoisted temporaries. + // This should be ignored unless we're doing actual variable pointers and backend supports it. + // Access chains cannot normally be lowered to temporaries in GLSL and HLSL. + if (type.pointer && !backend.native_pointers) + continue; + + add_local_variable_name(tmp.second); + auto &flags = get_decoration_bitset(tmp.second); + + // Not all targets support pointer literals, so don't bother with that case. + string initializer; + if (options.force_zero_initialized_variables && type_can_zero_initialize(type)) + initializer = join(" = ", to_zero_initialized_expression(tmp.first)); + + statement(flags_to_qualifiers_glsl(type, flags), variable_decl(type, to_name(tmp.second)), initializer, ";"); + + hoisted_temporaries.insert(tmp.second); + forced_temporaries.insert(tmp.second); + + // The temporary might be read from before it's assigned, set up the expression now. + set<SPIRExpression>(tmp.second, to_name(tmp.second), tmp.first, true); + + // If we have hoisted temporaries in multi-precision contexts, emit that here too ... + // We will not be able to analyze hoisted-ness for dependent temporaries that we hallucinate here. + auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(tmp.second); + if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end()) + { + uint32_t mirror_id = mirrored_precision_itr->second; + auto &mirror_flags = get_decoration_bitset(mirror_id); + statement(flags_to_qualifiers_glsl(type, mirror_flags), + variable_decl(type, to_name(mirror_id)), + initializer, ";"); + // The temporary might be read from before it's assigned, set up the expression now. + set<SPIRExpression>(mirror_id, to_name(mirror_id), tmp.first, true); + hoisted_temporaries.insert(mirror_id); + } + } +} + +void CompilerGLSL::emit_block_chain(SPIRBlock &block) +{ + bool select_branch_to_true_block = false; + bool select_branch_to_false_block = false; + bool skip_direct_branch = false; + bool emitted_loop_header_variables = false; + bool force_complex_continue_block = false; + ValueSaver<uint32_t> loop_level_saver(current_loop_level); + + if (block.merge == SPIRBlock::MergeLoop) + add_loop_level(); + + // If we're emitting PHI variables with precision aliases, we have to emit them as hoisted temporaries. + for (auto var_id : block.dominated_variables) + { + auto &var = get<SPIRVariable>(var_id); + if (var.phi_variable) + { + auto mirrored_precision_itr = temporary_to_mirror_precision_alias.find(var_id); + if (mirrored_precision_itr != temporary_to_mirror_precision_alias.end() && + find_if(block.declare_temporary.begin(), block.declare_temporary.end(), + [mirrored_precision_itr](const std::pair<TypeID, VariableID> &p) { + return p.second == mirrored_precision_itr->second; + }) == block.declare_temporary.end()) + { + block.declare_temporary.push_back({ var.basetype, mirrored_precision_itr->second }); + } + } + } + + emit_hoisted_temporaries(block.declare_temporary); + + SPIRBlock::ContinueBlockType continue_type = SPIRBlock::ContinueNone; + if (block.continue_block) + { + continue_type = continue_block_type(get<SPIRBlock>(block.continue_block)); + // If we know we cannot emit a loop, mark the block early as a complex loop so we don't force unnecessary recompiles. + if (continue_type == SPIRBlock::ComplexLoop) + block.complex_continue = true; + } + + // If we have loop variables, stop masking out access to the variable now. + for (auto var_id : block.loop_variables) + { + auto &var = get<SPIRVariable>(var_id); + var.loop_variable_enable = true; + // We're not going to declare the variable directly, so emit a copy here. + emit_variable_temporary_copies(var); + } + + // Remember deferred declaration state. We will restore it before returning. + SmallVector<bool, 64> rearm_dominated_variables(block.dominated_variables.size()); + for (size_t i = 0; i < block.dominated_variables.size(); i++) + { + uint32_t var_id = block.dominated_variables[i]; + auto &var = get<SPIRVariable>(var_id); + rearm_dominated_variables[i] = var.deferred_declaration; + } + + // This is the method often used by spirv-opt to implement loops. + // The loop header goes straight into the continue block. + // However, don't attempt this on ESSL 1.0, because if a loop variable is used in a continue block, + // it *MUST* be used in the continue block. This loop method will not work. + if (!is_legacy_es() && block_is_loop_candidate(block, SPIRBlock::MergeToSelectContinueForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectContinueForLoop)) + { + if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block))) + select_branch_to_false_block = true; + else + select_branch_to_true_block = true; + + emitted_loop_header_variables = true; + force_complex_continue_block = true; + } + } + // This is the older loop behavior in glslang which branches to loop body directly from the loop header. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToSelectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToSelectForLoop)) + { + // The body of while, is actually just the true (or false) block, so always branch there unconditionally. + if (execution_is_noop(get<SPIRBlock>(block.true_block), get<SPIRBlock>(block.merge_block))) + select_branch_to_false_block = true; + else + select_branch_to_true_block = true; + + emitted_loop_header_variables = true; + } + } + // This is the newer loop behavior in glslang which branches from Loop header directly to + // a new block, which in turn has a OpBranchSelection without a selection merge. + else if (block_is_loop_candidate(block, SPIRBlock::MergeToDirectForLoop)) + { + flush_undeclared_variables(block); + if (attempt_emit_loop_header(block, SPIRBlock::MergeToDirectForLoop)) + { + skip_direct_branch = true; + emitted_loop_header_variables = true; + } + } + else if (continue_type == SPIRBlock::DoWhileLoop) + { + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emitted_loop_header_variables = true; + // We have some temporaries where the loop header is the dominator. + // We risk a case where we have code like: + // for (;;) { create-temporary; break; } consume-temporary; + // so force-declare temporaries here. + emit_hoisted_temporaries(block.potential_declare_temporary); + statement("do"); + begin_scope(); + + emit_block_instructions(block); + } + else if (block.merge == SPIRBlock::MergeLoop) + { + flush_undeclared_variables(block); + emit_while_loop_initializers(block); + emitted_loop_header_variables = true; + + // We have a generic loop without any distinguishable pattern like for, while or do while. + get<SPIRBlock>(block.continue_block).complex_continue = true; + continue_type = SPIRBlock::ComplexLoop; + + // We have some temporaries where the loop header is the dominator. + // We risk a case where we have code like: + // for (;;) { create-temporary; break; } consume-temporary; + // so force-declare temporaries here. + emit_hoisted_temporaries(block.potential_declare_temporary); + emit_block_hints(block); + statement("for (;;)"); + begin_scope(); + + emit_block_instructions(block); + } + else + { + emit_block_instructions(block); + } + + // If we didn't successfully emit a loop header and we had loop variable candidates, we have a problem + // as writes to said loop variables might have been masked out, we need a recompile. + if (!emitted_loop_header_variables && !block.loop_variables.empty()) + { + force_recompile_guarantee_forward_progress(); + for (auto var : block.loop_variables) + get<SPIRVariable>(var).loop_variable = false; + block.loop_variables.clear(); + } + + flush_undeclared_variables(block); + bool emit_next_block = true; + + // Handle end of block. + switch (block.terminator) + { + case SPIRBlock::Direct: + // True when emitting complex continue block. + if (block.loop_dominator == block.next_block) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + // True if MergeToDirectForLoop succeeded. + else if (skip_direct_branch) + emit_next_block = false; + else if (is_continue(block.next_block) || is_break(block.next_block) || is_conditional(block.next_block)) + { + branch(block.self, block.next_block); + emit_next_block = false; + } + break; + + case SPIRBlock::Select: + // True if MergeToSelectForLoop or MergeToSelectContinueForLoop succeeded. + if (select_branch_to_true_block) + { + if (force_complex_continue_block) + { + assert(block.true_block == block.continue_block); + + // We're going to emit a continue block directly here, so make sure it's marked as complex. + auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue; + bool old_complex = complex_continue; + complex_continue = true; + branch(block.self, block.true_block); + complex_continue = old_complex; + } + else + branch(block.self, block.true_block); + } + else if (select_branch_to_false_block) + { + if (force_complex_continue_block) + { + assert(block.false_block == block.continue_block); + + // We're going to emit a continue block directly here, so make sure it's marked as complex. + auto &complex_continue = get<SPIRBlock>(block.continue_block).complex_continue; + bool old_complex = complex_continue; + complex_continue = true; + branch(block.self, block.false_block); + complex_continue = old_complex; + } + else + branch(block.self, block.false_block); + } + else + branch(block.self, block.condition, block.true_block, block.false_block); + break; + + case SPIRBlock::MultiSelect: + { + auto &type = expression_type(block.condition); + bool unsigned_case = type.basetype == SPIRType::UInt || type.basetype == SPIRType::UShort || + type.basetype == SPIRType::UByte || type.basetype == SPIRType::UInt64; + + if (block.merge == SPIRBlock::MergeNone) + SPIRV_CROSS_THROW("Switch statement is not structured"); + + if (!backend.support_64bit_switch && (type.basetype == SPIRType::UInt64 || type.basetype == SPIRType::Int64)) + { + // SPIR-V spec suggests this is allowed, but we cannot support it in higher level languages. + SPIRV_CROSS_THROW("Cannot use 64-bit switch selectors."); + } + + const char *label_suffix = ""; + if (type.basetype == SPIRType::UInt && backend.uint32_t_literal_suffix) + label_suffix = "u"; + else if (type.basetype == SPIRType::Int64 && backend.support_64bit_switch) + label_suffix = "l"; + else if (type.basetype == SPIRType::UInt64 && backend.support_64bit_switch) + label_suffix = "ul"; + else if (type.basetype == SPIRType::UShort) + label_suffix = backend.uint16_t_literal_suffix; + else if (type.basetype == SPIRType::Short) + label_suffix = backend.int16_t_literal_suffix; + + current_emitting_switch_stack.push_back(&block); + + if (block.need_ladder_break) + statement("bool _", block.self, "_ladder_break = false;"); + + // Find all unique case constructs. + unordered_map<uint32_t, SmallVector<uint64_t>> case_constructs; + SmallVector<uint32_t> block_declaration_order; + SmallVector<uint64_t> literals_to_merge; + + // If a switch case branches to the default block for some reason, we can just remove that literal from consideration + // and let the default: block handle it. + // 2.11 in SPIR-V spec states that for fall-through cases, there is a very strict declaration order which we can take advantage of here. + // We only need to consider possible fallthrough if order[i] branches to order[i + 1]. + auto &cases = get_case_list(block); + for (auto &c : cases) + { + if (c.block != block.next_block && c.block != block.default_block) + { + if (!case_constructs.count(c.block)) + block_declaration_order.push_back(c.block); + case_constructs[c.block].push_back(c.value); + } + else if (c.block == block.next_block && block.default_block != block.next_block) + { + // We might have to flush phi inside specific case labels. + // If we can piggyback on default:, do so instead. + literals_to_merge.push_back(c.value); + } + } + + // Empty literal array -> default. + if (block.default_block != block.next_block) + { + auto &default_block = get<SPIRBlock>(block.default_block); + + // We need to slide in the default block somewhere in this chain + // if there are fall-through scenarios since the default is declared separately in OpSwitch. + // Only consider trivial fall-through cases here. + size_t num_blocks = block_declaration_order.size(); + bool injected_block = false; + + for (size_t i = 0; i < num_blocks; i++) + { + auto &case_block = get<SPIRBlock>(block_declaration_order[i]); + if (execution_is_direct_branch(case_block, default_block)) + { + // Fallthrough to default block, we must inject the default block here. + block_declaration_order.insert(begin(block_declaration_order) + i + 1, block.default_block); + injected_block = true; + break; + } + else if (execution_is_direct_branch(default_block, case_block)) + { + // Default case is falling through to another case label, we must inject the default block here. + block_declaration_order.insert(begin(block_declaration_order) + i, block.default_block); + injected_block = true; + break; + } + } + + // Order does not matter. + if (!injected_block) + block_declaration_order.push_back(block.default_block); + else if (is_legacy_es()) + SPIRV_CROSS_THROW("Default case label fallthrough to other case label is not supported in ESSL 1.0."); + + case_constructs[block.default_block] = {}; + } + + size_t num_blocks = block_declaration_order.size(); + + const auto to_case_label = [](uint64_t literal, uint32_t width, bool is_unsigned_case) -> string + { + if (is_unsigned_case) + return convert_to_string(literal); + + // For smaller cases, the literals are compiled as 32 bit wide + // literals so we don't need to care for all sizes specifically. + if (width <= 32) + { + return convert_to_string(int64_t(int32_t(literal))); + } + + return convert_to_string(int64_t(literal)); + }; + + const auto to_legacy_case_label = [&](uint32_t condition, const SmallVector<uint64_t> &labels, + const char *suffix) -> string { + string ret; + size_t count = labels.size(); + for (size_t i = 0; i < count; i++) + { + if (i) + ret += " || "; + ret += join(count > 1 ? "(" : "", to_enclosed_expression(condition), " == ", labels[i], suffix, + count > 1 ? ")" : ""); + } + return ret; + }; + + // We need to deal with a complex scenario for OpPhi. If we have case-fallthrough and Phi in the picture, + // we need to flush phi nodes outside the switch block in a branch, + // and skip any Phi handling inside the case label to make fall-through work as expected. + // This kind of code-gen is super awkward and it's a last resort. Normally we would want to handle this + // inside the case label if at all possible. + for (size_t i = 1; backend.support_case_fallthrough && i < num_blocks; i++) + { + if (flush_phi_required(block.self, block_declaration_order[i]) && + flush_phi_required(block_declaration_order[i - 1], block_declaration_order[i])) + { + uint32_t target_block = block_declaration_order[i]; + + // Make sure we flush Phi, it might have been marked to be ignored earlier. + get<SPIRBlock>(target_block).ignore_phi_from_block = 0; + + auto &literals = case_constructs[target_block]; + + if (literals.empty()) + { + // Oh boy, gotta make a complete negative test instead! o.o + // Find all possible literals that would *not* make us enter the default block. + // If none of those literals match, we flush Phi ... + SmallVector<string> conditions; + for (size_t j = 0; j < num_blocks; j++) + { + auto &negative_literals = case_constructs[block_declaration_order[j]]; + for (auto &case_label : negative_literals) + conditions.push_back(join(to_enclosed_expression(block.condition), + " != ", to_case_label(case_label, type.width, unsigned_case))); + } + + statement("if (", merge(conditions, " && "), ")"); + begin_scope(); + flush_phi(block.self, target_block); + end_scope(); + } + else + { + SmallVector<string> conditions; + conditions.reserve(literals.size()); + for (auto &case_label : literals) + conditions.push_back(join(to_enclosed_expression(block.condition), + " == ", to_case_label(case_label, type.width, unsigned_case))); + statement("if (", merge(conditions, " || "), ")"); + begin_scope(); + flush_phi(block.self, target_block); + end_scope(); + } + + // Mark the block so that we don't flush Phi from header to case label. + get<SPIRBlock>(target_block).ignore_phi_from_block = block.self; + } + } + + // If there is only one default block, and no cases, this is a case where SPIRV-opt decided to emulate + // non-structured exits with the help of a switch block. + // This is buggy on FXC, so just emit the logical equivalent of a do { } while(false), which is more idiomatic. + bool block_like_switch = cases.empty(); + + // If this is true, the switch is completely meaningless, and we should just avoid it. + bool collapsed_switch = block_like_switch && block.default_block == block.next_block; + + if (!collapsed_switch) + { + if (block_like_switch || is_legacy_es()) + { + // ESSL 1.0 is not guaranteed to support do/while. + if (is_legacy_es()) + { + uint32_t counter = statement_count; + statement("for (int spvDummy", counter, " = 0; spvDummy", counter, " < 1; spvDummy", counter, + "++)"); + } + else + statement("do"); + } + else + { + emit_block_hints(block); + statement("switch (", to_unpacked_expression(block.condition), ")"); + } + begin_scope(); + } + + for (size_t i = 0; i < num_blocks; i++) + { + uint32_t target_block = block_declaration_order[i]; + auto &literals = case_constructs[target_block]; + + if (literals.empty()) + { + // Default case. + if (!block_like_switch) + { + if (is_legacy_es()) + statement("else"); + else + statement("default:"); + } + } + else + { + if (is_legacy_es()) + { + statement((i ? "else " : ""), "if (", to_legacy_case_label(block.condition, literals, label_suffix), + ")"); + } + else + { + for (auto &case_literal : literals) + { + // The case label value must be sign-extended properly in SPIR-V, so we can assume 32-bit values here. + statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":"); + } + } + } + + auto &case_block = get<SPIRBlock>(target_block); + if (backend.support_case_fallthrough && i + 1 < num_blocks && + execution_is_direct_branch(case_block, get<SPIRBlock>(block_declaration_order[i + 1]))) + { + // We will fall through here, so just terminate the block chain early. + // We still need to deal with Phi potentially. + // No need for a stack-like thing here since we only do fall-through when there is a + // single trivial branch to fall-through target.. + current_emitting_switch_fallthrough = true; + } + else + current_emitting_switch_fallthrough = false; + + if (!block_like_switch) + begin_scope(); + branch(block.self, target_block); + if (!block_like_switch) + end_scope(); + + current_emitting_switch_fallthrough = false; + } + + // Might still have to flush phi variables if we branch from loop header directly to merge target. + // This is supposed to emit all cases where we branch from header to merge block directly. + // There are two main scenarios where cannot rely on default fallthrough. + // - There is an explicit default: label already. + // In this case, literals_to_merge need to form their own "default" case, so that we avoid executing that block. + // - Header -> Merge requires flushing PHI. In this case, we need to collect all cases and flush PHI there. + bool header_merge_requires_phi = flush_phi_required(block.self, block.next_block); + bool need_fallthrough_block = block.default_block == block.next_block || !literals_to_merge.empty(); + if (!collapsed_switch && ((header_merge_requires_phi && need_fallthrough_block) || !literals_to_merge.empty())) + { + for (auto &case_literal : literals_to_merge) + statement("case ", to_case_label(case_literal, type.width, unsigned_case), label_suffix, ":"); + + if (block.default_block == block.next_block) + { + if (is_legacy_es()) + statement("else"); + else + statement("default:"); + } + + begin_scope(); + flush_phi(block.self, block.next_block); + statement("break;"); + end_scope(); + } + + if (!collapsed_switch) + { + if (block_like_switch && !is_legacy_es()) + end_scope_decl("while(false)"); + else + end_scope(); + } + else + flush_phi(block.self, block.next_block); + + if (block.need_ladder_break) + { + statement("if (_", block.self, "_ladder_break)"); + begin_scope(); + statement("break;"); + end_scope(); + } + + current_emitting_switch_stack.pop_back(); + break; + } + + case SPIRBlock::Return: + { + for (auto &line : current_function->fixup_hooks_out) + line(); + + if (processing_entry_point) + emit_fixup(); + + auto &cfg = get_cfg_for_current_function(); + + if (block.return_value) + { + auto &type = expression_type(block.return_value); + if (!type.array.empty() && !backend.can_return_array) + { + // If we cannot return arrays, we will have a special out argument we can write to instead. + // The backend is responsible for setting this up, and redirection the return values as appropriate. + if (ir.ids[block.return_value].get_type() != TypeUndef) + { + emit_array_copy("spvReturnValue", 0, block.return_value, StorageClassFunction, + get_expression_effective_storage_class(block.return_value)); + } + + if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) + { + statement("return;"); + } + } + else + { + // OpReturnValue can return Undef, so don't emit anything for this case. + if (ir.ids[block.return_value].get_type() != TypeUndef) + statement("return ", to_unpacked_expression(block.return_value), ";"); + } + } + else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) + { + // If this block is the very final block and not called from control flow, + // we do not need an explicit return which looks out of place. Just end the function here. + // In the very weird case of for(;;) { return; } executing return is unconditional, + // but we actually need a return here ... + statement("return;"); + } + break; + } + + // If the Kill is terminating a block with a (probably synthetic) return value, emit a return value statement. + case SPIRBlock::Kill: + statement(backend.discard_literal, ";"); + if (block.return_value) + statement("return ", to_unpacked_expression(block.return_value), ";"); + break; + + case SPIRBlock::Unreachable: + { + // Avoid emitting false fallthrough, which can happen for + // if (cond) break; else discard; inside a case label. + // Discard is not always implementable as a terminator. + + auto &cfg = get_cfg_for_current_function(); + bool inner_dominator_is_switch = false; + ID id = block.self; + + while (id) + { + auto &iter_block = get<SPIRBlock>(id); + if (iter_block.terminator == SPIRBlock::MultiSelect || + iter_block.merge == SPIRBlock::MergeLoop) + { + ID next_block = iter_block.merge == SPIRBlock::MergeLoop ? + iter_block.merge_block : iter_block.next_block; + bool outside_construct = next_block && cfg.find_common_dominator(next_block, block.self) == next_block; + if (!outside_construct) + { + inner_dominator_is_switch = iter_block.terminator == SPIRBlock::MultiSelect; + break; + } + } + + if (cfg.get_preceding_edges(id).empty()) + break; + + id = cfg.get_immediate_dominator(id); + } + + if (inner_dominator_is_switch) + statement("break; // unreachable workaround"); + + emit_next_block = false; + break; + } + + case SPIRBlock::IgnoreIntersection: + statement("ignoreIntersectionEXT;"); + break; + + case SPIRBlock::TerminateRay: + statement("terminateRayEXT;"); + break; + + case SPIRBlock::EmitMeshTasks: + emit_mesh_tasks(block); + break; + + default: + SPIRV_CROSS_THROW("Unimplemented block terminator."); + } + + if (block.next_block && emit_next_block) + { + // If we hit this case, we're dealing with an unconditional branch, which means we will output + // that block after this. If we had selection merge, we already flushed phi variables. + if (block.merge != SPIRBlock::MergeSelection) + { + flush_phi(block.self, block.next_block); + // For a direct branch, need to remember to invalidate expressions in the next linear block instead. + get<SPIRBlock>(block.next_block).invalidate_expressions = block.invalidate_expressions; + } + + // For switch fallthrough cases, we terminate the chain here, but we still need to handle Phi. + if (!current_emitting_switch_fallthrough) + { + // For merge selects we might have ignored the fact that a merge target + // could have been a break; or continue; + // We will need to deal with it here. + if (is_loop_break(block.next_block)) + { + // Cannot check for just break, because switch statements will also use break. + assert(block.merge == SPIRBlock::MergeSelection); + statement("break;"); + } + else if (is_continue(block.next_block)) + { + assert(block.merge == SPIRBlock::MergeSelection); + branch_to_continue(block.self, block.next_block); + } + else if (BlockID(block.self) != block.next_block) + emit_block_chain(get<SPIRBlock>(block.next_block)); + } + } + + if (block.merge == SPIRBlock::MergeLoop) + { + if (continue_type == SPIRBlock::DoWhileLoop) + { + // Make sure that we run the continue block to get the expressions set, but this + // should become an empty string. + // We have no fallbacks if we cannot forward everything to temporaries ... + const auto &continue_block = get<SPIRBlock>(block.continue_block); + bool positive_test = execution_is_noop(get<SPIRBlock>(continue_block.true_block), + get<SPIRBlock>(continue_block.loop_dominator)); + + uint32_t current_count = statement_count; + auto statements = emit_continue_block(block.continue_block, positive_test, !positive_test); + if (statement_count != current_count) + { + // The DoWhile block has side effects, force ComplexLoop pattern next pass. + get<SPIRBlock>(block.continue_block).complex_continue = true; + force_recompile(); + } + + // Might have to invert the do-while test here. + auto condition = to_expression(continue_block.condition); + if (!positive_test) + condition = join("!", enclose_expression(condition)); + + end_scope_decl(join("while (", condition, ")")); + } + else + end_scope(); + + loop_level_saver.release(); + + // We cannot break out of two loops at once, so don't check for break; here. + // Using block.self as the "from" block isn't quite right, but it has the same scope + // and dominance structure, so it's fine. + if (is_continue(block.merge_block)) + branch_to_continue(block.self, block.merge_block); + else + emit_block_chain(get<SPIRBlock>(block.merge_block)); + } + + // Forget about control dependent expressions now. + block.invalidate_expressions.clear(); + + // After we return, we must be out of scope, so if we somehow have to re-emit this function, + // re-declare variables if necessary. + assert(rearm_dominated_variables.size() == block.dominated_variables.size()); + for (size_t i = 0; i < block.dominated_variables.size(); i++) + { + uint32_t var = block.dominated_variables[i]; + get<SPIRVariable>(var).deferred_declaration = rearm_dominated_variables[i]; + } + + // Just like for deferred declaration, we need to forget about loop variable enable + // if our block chain is reinstantiated later. + for (auto &var_id : block.loop_variables) + get<SPIRVariable>(var_id).loop_variable_enable = false; +} + +void CompilerGLSL::begin_scope() +{ + statement("{"); + indent++; +} + +void CompilerGLSL::end_scope() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}"); +} + +void CompilerGLSL::end_scope(const string &trailer) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}", trailer); +} + +void CompilerGLSL::end_scope_decl() +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("};"); +} + +void CompilerGLSL::end_scope_decl(const string &decl) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("} ", decl, ";"); +} + +void CompilerGLSL::check_function_call_constraints(const uint32_t *args, uint32_t length) +{ + // If our variable is remapped, and we rely on type-remapping information as + // well, then we cannot pass the variable as a function parameter. + // Fixing this is non-trivial without stamping out variants of the same function, + // so for now warn about this and suggest workarounds instead. + for (uint32_t i = 0; i < length; i++) + { + auto *var = maybe_get<SPIRVariable>(args[i]); + if (!var || !var->remapped_variable) + continue; + + auto &type = get<SPIRType>(var->basetype); + if (type.basetype == SPIRType::Image && type.image.dim == DimSubpassData) + { + SPIRV_CROSS_THROW("Tried passing a remapped subpassInput variable to a function. " + "This will not work correctly because type-remapping information is lost. " + "To workaround, please consider not passing the subpass input as a function parameter, " + "or use in/out variables instead which do not need type remapping information."); + } + } +} + +const Instruction *CompilerGLSL::get_next_instruction_in_block(const Instruction &instr) +{ + // FIXME: This is kind of hacky. There should be a cleaner way. + auto offset = uint32_t(&instr - current_emitting_block->ops.data()); + if ((offset + 1) < current_emitting_block->ops.size()) + return ¤t_emitting_block->ops[offset + 1]; + else + return nullptr; +} + +uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) +{ + return semantics & (MemorySemanticsAtomicCounterMemoryMask | MemorySemanticsImageMemoryMask | + MemorySemanticsWorkgroupMemoryMask | MemorySemanticsUniformMemoryMask | + MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); +} + +bool CompilerGLSL::emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id, StorageClass, StorageClass) +{ + string lhs; + if (expr) + lhs = expr; + else + lhs = to_expression(lhs_id); + + statement(lhs, " = ", to_expression(rhs_id), ";"); + return true; +} + +bool CompilerGLSL::unroll_array_to_complex_store(uint32_t target_id, uint32_t source_id) +{ + if (!backend.force_gl_in_out_block) + return false; + // This path is only relevant for GL backends. + + auto *var = maybe_get<SPIRVariable>(target_id); + if (!var || var->storage != StorageClassOutput) + return false; + + if (!is_builtin_variable(*var) || BuiltIn(get_decoration(var->self, DecorationBuiltIn)) != BuiltInSampleMask) + return false; + + auto &type = expression_type(source_id); + string array_expr; + if (type.array_size_literal.back()) + { + array_expr = convert_to_string(type.array.back()); + if (type.array.back() == 0) + SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); + } + else + array_expr = to_expression(type.array.back()); + + SPIRType target_type { OpTypeInt }; + target_type.basetype = SPIRType::Int; + + statement("for (int i = 0; i < int(", array_expr, "); i++)"); + begin_scope(); + statement(to_expression(target_id), "[i] = ", + bitcast_expression(target_type, type.basetype, join(to_expression(source_id), "[i]")), + ";"); + end_scope(); + + return true; +} + +void CompilerGLSL::unroll_array_from_complex_load(uint32_t target_id, uint32_t source_id, std::string &expr) +{ + if (!backend.force_gl_in_out_block) + return; + // This path is only relevant for GL backends. + + auto *var = maybe_get<SPIRVariable>(source_id); + if (!var) + return; + + if (var->storage != StorageClassInput && var->storage != StorageClassOutput) + return; + + auto &type = get_variable_data_type(*var); + if (type.array.empty()) + return; + + auto builtin = BuiltIn(get_decoration(var->self, DecorationBuiltIn)); + bool is_builtin = is_builtin_variable(*var) && + (builtin == BuiltInPointSize || + builtin == BuiltInPosition || + builtin == BuiltInSampleMask); + bool is_tess = is_tessellation_shader(); + bool is_patch = has_decoration(var->self, DecorationPatch); + bool is_sample_mask = is_builtin && builtin == BuiltInSampleMask; + + // Tessellation input arrays are special in that they are unsized, so we cannot directly copy from it. + // We must unroll the array load. + // For builtins, we couldn't catch this case normally, + // because this is resolved in the OpAccessChain in most cases. + // If we load the entire array, we have no choice but to unroll here. + if (!is_patch && (is_builtin || is_tess)) + { + auto new_expr = join("_", target_id, "_unrolled"); + statement(variable_decl(type, new_expr, target_id), ";"); + string array_expr; + if (type.array_size_literal.back()) + { + array_expr = convert_to_string(type.array.back()); + if (type.array.back() == 0) + SPIRV_CROSS_THROW("Cannot unroll an array copy from unsized array."); + } + else + array_expr = to_expression(type.array.back()); + + // The array size might be a specialization constant, so use a for-loop instead. + statement("for (int i = 0; i < int(", array_expr, "); i++)"); + begin_scope(); + if (is_builtin && !is_sample_mask) + statement(new_expr, "[i] = gl_in[i].", expr, ";"); + else if (is_sample_mask) + { + SPIRType target_type { OpTypeInt }; + target_type.basetype = SPIRType::Int; + statement(new_expr, "[i] = ", bitcast_expression(target_type, type.basetype, join(expr, "[i]")), ";"); + } + else + statement(new_expr, "[i] = ", expr, "[i];"); + end_scope(); + + expr = std::move(new_expr); + } +} + +void CompilerGLSL::cast_from_variable_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) +{ + // We will handle array cases elsewhere. + if (!expr_type.array.empty()) + return; + + auto *var = maybe_get_backing_variable(source_id); + if (var) + source_id = var->self; + + // Only interested in standalone builtin variables. + if (!has_decoration(source_id, DecorationBuiltIn)) + { + // Except for int attributes in legacy GLSL, which are cast from float. + if (is_legacy() && expr_type.basetype == SPIRType::Int && var && var->storage == StorageClassInput) + expr = join(type_to_glsl(expr_type), "(", expr, ")"); + return; + } + + auto builtin = static_cast<BuiltIn>(get_decoration(source_id, DecorationBuiltIn)); + auto expected_type = expr_type.basetype; + + // TODO: Fill in for more builtins. + switch (builtin) + { + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInInstanceId: + case BuiltInInstanceIndex: + case BuiltInVertexId: + case BuiltInVertexIndex: + case BuiltInSampleId: + case BuiltInBaseVertex: + case BuiltInBaseInstance: + case BuiltInDrawIndex: + case BuiltInFragStencilRefEXT: + case BuiltInInstanceCustomIndexNV: + case BuiltInSampleMask: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInShadingRateKHR: + expected_type = SPIRType::Int; + break; + + case BuiltInGlobalInvocationId: + case BuiltInLocalInvocationId: + case BuiltInWorkgroupId: + case BuiltInLocalInvocationIndex: + case BuiltInWorkgroupSize: + case BuiltInNumWorkgroups: + case BuiltInIncomingRayFlagsNV: + case BuiltInLaunchIdNV: + case BuiltInLaunchSizeNV: + case BuiltInPrimitiveTriangleIndicesEXT: + case BuiltInPrimitiveLineIndicesEXT: + case BuiltInPrimitivePointIndicesEXT: + expected_type = SPIRType::UInt; + break; + + default: + break; + } + + if (expected_type != expr_type.basetype) + expr = bitcast_expression(expr_type, expected_type, expr); +} + +SPIRType::BaseType CompilerGLSL::get_builtin_basetype(BuiltIn builtin, SPIRType::BaseType default_type) +{ + // TODO: Fill in for more builtins. + switch (builtin) + { + case BuiltInLayer: + case BuiltInPrimitiveId: + case BuiltInViewportIndex: + case BuiltInFragStencilRefEXT: + case BuiltInSampleMask: + case BuiltInPrimitiveShadingRateKHR: + case BuiltInShadingRateKHR: + return SPIRType::Int; + + default: + return default_type; + } +} + +void CompilerGLSL::cast_to_variable_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) +{ + auto *var = maybe_get_backing_variable(target_id); + if (var) + target_id = var->self; + + // Only interested in standalone builtin variables. + if (!has_decoration(target_id, DecorationBuiltIn)) + return; + + auto builtin = static_cast<BuiltIn>(get_decoration(target_id, DecorationBuiltIn)); + auto expected_type = get_builtin_basetype(builtin, expr_type.basetype); + + if (expected_type != expr_type.basetype) + { + auto type = expr_type; + type.basetype = expected_type; + expr = bitcast_expression(type, expr_type.basetype, expr); + } +} + +void CompilerGLSL::convert_non_uniform_expression(string &expr, uint32_t ptr_id) +{ + if (*backend.nonuniform_qualifier == '\0') + return; + + auto *var = maybe_get_backing_variable(ptr_id); + if (!var) + return; + + if (var->storage != StorageClassUniformConstant && + var->storage != StorageClassStorageBuffer && + var->storage != StorageClassUniform) + return; + + auto &backing_type = get<SPIRType>(var->basetype); + if (backing_type.array.empty()) + return; + + // If we get here, we know we're accessing an arrayed resource which + // might require nonuniform qualifier. + + auto start_array_index = expr.find_first_of('['); + + if (start_array_index == string::npos) + return; + + // We've opened a bracket, track expressions until we can close the bracket. + // This must be our resource index. + size_t end_array_index = string::npos; + unsigned bracket_count = 1; + for (size_t index = start_array_index + 1; index < expr.size(); index++) + { + if (expr[index] == ']') + { + if (--bracket_count == 0) + { + end_array_index = index; + break; + } + } + else if (expr[index] == '[') + bracket_count++; + } + + assert(bracket_count == 0); + + // Doesn't really make sense to declare a non-arrayed image with nonuniformEXT, but there's + // nothing we can do here to express that. + if (start_array_index == string::npos || end_array_index == string::npos || end_array_index < start_array_index) + return; + + start_array_index++; + + expr = join(expr.substr(0, start_array_index), backend.nonuniform_qualifier, "(", + expr.substr(start_array_index, end_array_index - start_array_index), ")", + expr.substr(end_array_index, string::npos)); +} + +void CompilerGLSL::emit_block_hints(const SPIRBlock &block) +{ + if ((options.es && options.version < 310) || (!options.es && options.version < 140)) + return; + + switch (block.hint) + { + case SPIRBlock::HintFlatten: + require_extension_internal("GL_EXT_control_flow_attributes"); + statement("SPIRV_CROSS_FLATTEN"); + break; + case SPIRBlock::HintDontFlatten: + require_extension_internal("GL_EXT_control_flow_attributes"); + statement("SPIRV_CROSS_BRANCH"); + break; + case SPIRBlock::HintUnroll: + require_extension_internal("GL_EXT_control_flow_attributes"); + statement("SPIRV_CROSS_UNROLL"); + break; + case SPIRBlock::HintDontUnroll: + require_extension_internal("GL_EXT_control_flow_attributes"); + statement("SPIRV_CROSS_LOOP"); + break; + default: + break; + } +} + +void CompilerGLSL::preserve_alias_on_reset(uint32_t id) +{ + preserved_aliases[id] = get_name(id); +} + +void CompilerGLSL::reset_name_caches() +{ + for (auto &preserved : preserved_aliases) + set_name(preserved.first, preserved.second); + + preserved_aliases.clear(); + resource_names.clear(); + block_input_names.clear(); + block_output_names.clear(); + block_ubo_names.clear(); + block_ssbo_names.clear(); + block_names.clear(); + function_overloads.clear(); +} + +void CompilerGLSL::fixup_anonymous_struct_names(std::unordered_set<uint32_t> &visited, const SPIRType &type) +{ + if (visited.count(type.self)) + return; + visited.insert(type.self); + + for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++) + { + auto &mbr_type = get<SPIRType>(type.member_types[i]); + + if (mbr_type.basetype == SPIRType::Struct) + { + // If there are multiple aliases, the output might be somewhat unpredictable, + // but the only real alternative in that case is to do nothing, which isn't any better. + // This check should be fine in practice. + if (get_name(mbr_type.self).empty() && !get_member_name(type.self, i).empty()) + { + auto anon_name = join("anon_", get_member_name(type.self, i)); + ParsedIR::sanitize_underscores(anon_name); + set_name(mbr_type.self, anon_name); + } + + fixup_anonymous_struct_names(visited, mbr_type); + } + } +} + +void CompilerGLSL::fixup_anonymous_struct_names() +{ + // HLSL codegen can often end up emitting anonymous structs inside blocks, which + // breaks GL linking since all names must match ... + // Try to emit sensible code, so attempt to find such structs and emit anon_$member. + + // Breaks exponential explosion with weird type trees. + std::unordered_set<uint32_t> visited; + + ir.for_each_typed_id<SPIRType>([&](uint32_t, SPIRType &type) { + if (type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || + has_decoration(type.self, DecorationBufferBlock))) + { + fixup_anonymous_struct_names(visited, type); + } + }); +} + +void CompilerGLSL::fixup_type_alias() +{ + // Due to how some backends work, the "master" type of type_alias must be a block-like type if it exists. + ir.for_each_typed_id<SPIRType>([&](uint32_t self, SPIRType &type) { + if (!type.type_alias) + return; + + if (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)) + { + // Top-level block types should never alias anything else. + type.type_alias = 0; + } + else if (type_is_block_like(type) && type.self == ID(self)) + { + // A block-like type is any type which contains Offset decoration, but not top-level blocks, + // i.e. blocks which are placed inside buffers. + // Become the master. + ir.for_each_typed_id<SPIRType>([&](uint32_t other_id, SPIRType &other_type) { + if (other_id == self) + return; + + if (other_type.type_alias == type.type_alias) + other_type.type_alias = self; + }); + + this->get<SPIRType>(type.type_alias).type_alias = self; + type.type_alias = 0; + } + }); +} + +void CompilerGLSL::reorder_type_alias() +{ + // Reorder declaration of types so that the master of the type alias is always emitted first. + // We need this in case a type B depends on type A (A must come before in the vector), but A is an alias of a type Abuffer, which + // means declaration of A doesn't happen (yet), and order would be B, ABuffer and not ABuffer, B. Fix this up here. + auto loop_lock = ir.create_loop_hard_lock(); + + auto &type_ids = ir.ids_for_type[TypeType]; + for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) + { + auto &type = get<SPIRType>(*alias_itr); + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + { + // We will skip declaring this type, so make sure the type_alias type comes before. + auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias)); + assert(master_itr != end(type_ids)); + + if (alias_itr < master_itr) + { + // Must also swap the type order for the constant-type joined array. + auto &joined_types = ir.ids_for_constant_undef_or_type; + auto alt_alias_itr = find(begin(joined_types), end(joined_types), *alias_itr); + auto alt_master_itr = find(begin(joined_types), end(joined_types), *master_itr); + assert(alt_alias_itr != end(joined_types)); + assert(alt_master_itr != end(joined_types)); + + swap(*alias_itr, *master_itr); + swap(*alt_alias_itr, *alt_master_itr); + } + } + } +} + +void CompilerGLSL::emit_line_directive(uint32_t file_id, uint32_t line_literal) +{ + // If we are redirecting statements, ignore the line directive. + // Common case here is continue blocks. + if (redirect_statement) + return; + + // If we're emitting code in a sensitive context such as condition blocks in for loops, don't emit + // any line directives, because it's not possible. + if (block_debug_directives) + return; + + if (options.emit_line_directives) + { + require_extension_internal("GL_GOOGLE_cpp_style_line_directive"); + statement_no_indent("#line ", line_literal, " \"", get<SPIRString>(file_id).str, "\""); + } +} + +void CompilerGLSL::emit_copy_logical_type(uint32_t lhs_id, uint32_t lhs_type_id, uint32_t rhs_id, uint32_t rhs_type_id, + SmallVector<uint32_t> chain) +{ + // Fully unroll all member/array indices one by one. + + auto &lhs_type = get<SPIRType>(lhs_type_id); + auto &rhs_type = get<SPIRType>(rhs_type_id); + + if (!lhs_type.array.empty()) + { + // Could use a loop here to support specialization constants, but it gets rather complicated with nested array types, + // and this is a rather obscure opcode anyways, keep it simple unless we are forced to. + uint32_t array_size = to_array_size_literal(lhs_type); + chain.push_back(0); + + for (uint32_t i = 0; i < array_size; i++) + { + chain.back() = i; + emit_copy_logical_type(lhs_id, lhs_type.parent_type, rhs_id, rhs_type.parent_type, chain); + } + } + else if (lhs_type.basetype == SPIRType::Struct) + { + chain.push_back(0); + uint32_t member_count = uint32_t(lhs_type.member_types.size()); + for (uint32_t i = 0; i < member_count; i++) + { + chain.back() = i; + emit_copy_logical_type(lhs_id, lhs_type.member_types[i], rhs_id, rhs_type.member_types[i], chain); + } + } + else + { + // Need to handle unpack/packing fixups since this can differ wildly between the logical types, + // particularly in MSL. + // To deal with this, we emit access chains and go through emit_store_statement + // to deal with all the special cases we can encounter. + + AccessChainMeta lhs_meta, rhs_meta; + auto lhs = access_chain_internal(lhs_id, chain.data(), uint32_t(chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &lhs_meta); + auto rhs = access_chain_internal(rhs_id, chain.data(), uint32_t(chain.size()), + ACCESS_CHAIN_INDEX_IS_LITERAL_BIT, &rhs_meta); + + uint32_t id = ir.increase_bound_by(2); + lhs_id = id; + rhs_id = id + 1; + + { + auto &lhs_expr = set<SPIRExpression>(lhs_id, std::move(lhs), lhs_type_id, true); + lhs_expr.need_transpose = lhs_meta.need_transpose; + + if (lhs_meta.storage_is_packed) + set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypePacked); + if (lhs_meta.storage_physical_type != 0) + set_extended_decoration(lhs_id, SPIRVCrossDecorationPhysicalTypeID, lhs_meta.storage_physical_type); + + forwarded_temporaries.insert(lhs_id); + suppressed_usage_tracking.insert(lhs_id); + } + + { + auto &rhs_expr = set<SPIRExpression>(rhs_id, std::move(rhs), rhs_type_id, true); + rhs_expr.need_transpose = rhs_meta.need_transpose; + + if (rhs_meta.storage_is_packed) + set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypePacked); + if (rhs_meta.storage_physical_type != 0) + set_extended_decoration(rhs_id, SPIRVCrossDecorationPhysicalTypeID, rhs_meta.storage_physical_type); + + forwarded_temporaries.insert(rhs_id); + suppressed_usage_tracking.insert(rhs_id); + } + + emit_store_statement(lhs_id, rhs_id); + } +} + +bool CompilerGLSL::subpass_input_is_framebuffer_fetch(uint32_t id) const +{ + if (!has_decoration(id, DecorationInputAttachmentIndex)) + return false; + + uint32_t input_attachment_index = get_decoration(id, DecorationInputAttachmentIndex); + for (auto &remap : subpass_to_framebuffer_fetch_attachment) + if (remap.first == input_attachment_index) + return true; + + return false; +} + +const SPIRVariable *CompilerGLSL::find_subpass_input_by_attachment_index(uint32_t index) const +{ + const SPIRVariable *ret = nullptr; + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { + if (has_decoration(var.self, DecorationInputAttachmentIndex) && + get_decoration(var.self, DecorationInputAttachmentIndex) == index) + { + ret = &var; + } + }); + return ret; +} + +const SPIRVariable *CompilerGLSL::find_color_output_by_location(uint32_t location) const +{ + const SPIRVariable *ret = nullptr; + ir.for_each_typed_id<SPIRVariable>([&](uint32_t, const SPIRVariable &var) { + if (var.storage == StorageClassOutput && get_decoration(var.self, DecorationLocation) == location) + ret = &var; + }); + return ret; +} + +void CompilerGLSL::emit_inout_fragment_outputs_copy_to_subpass_inputs() +{ + for (auto &remap : subpass_to_framebuffer_fetch_attachment) + { + auto *subpass_var = find_subpass_input_by_attachment_index(remap.first); + auto *output_var = find_color_output_by_location(remap.second); + if (!subpass_var) + continue; + if (!output_var) + SPIRV_CROSS_THROW("Need to declare the corresponding fragment output variable to be able " + "to read from it."); + if (is_array(get<SPIRType>(output_var->basetype))) + SPIRV_CROSS_THROW("Cannot use GL_EXT_shader_framebuffer_fetch with arrays of color outputs."); + + auto &func = get<SPIRFunction>(get_entry_point().self); + func.fixup_hooks_in.push_back([=]() { + if (is_legacy()) + { + statement(to_expression(subpass_var->self), " = ", "gl_LastFragData[", + get_decoration(output_var->self, DecorationLocation), "];"); + } + else + { + uint32_t num_rt_components = this->get<SPIRType>(output_var->basetype).vecsize; + statement(to_expression(subpass_var->self), vector_swizzle(num_rt_components, 0), " = ", + to_expression(output_var->self), ";"); + } + }); + } +} + +bool CompilerGLSL::variable_is_depth_or_compare(VariableID id) const +{ + return is_depth_image(get<SPIRType>(get<SPIRVariable>(id).basetype), id); +} + +const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extension_name(Candidate c) +{ + static const char *const retval[CandidateCount] = { "GL_KHR_shader_subgroup_ballot", + "GL_KHR_shader_subgroup_basic", + "GL_KHR_shader_subgroup_vote", + "GL_KHR_shader_subgroup_arithmetic", + "GL_NV_gpu_shader_5", + "GL_NV_shader_thread_group", + "GL_NV_shader_thread_shuffle", + "GL_ARB_shader_ballot", + "GL_ARB_shader_group_vote", + "GL_AMD_gcn_shader" }; + return retval[c]; +} + +SmallVector<std::string> CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_names(Candidate c) +{ + switch (c) + { + case ARB_shader_ballot: + return { "GL_ARB_shader_int64" }; + case AMD_gcn_shader: + return { "GL_AMD_gpu_shader_int64", "GL_NV_gpu_shader5" }; + default: + return {}; + } +} + +const char *CompilerGLSL::ShaderSubgroupSupportHelper::get_extra_required_extension_predicate(Candidate c) +{ + switch (c) + { + case ARB_shader_ballot: + return "defined(GL_ARB_shader_int64)"; + case AMD_gcn_shader: + return "(defined(GL_AMD_gpu_shader_int64) || defined(GL_NV_gpu_shader5))"; + default: + return ""; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_feature_dependencies(Feature feature) +{ + switch (feature) + { + case SubgroupAllEqualT: + return { SubgroupBroadcast_First, SubgroupAll_Any_AllEqualBool }; + case SubgroupElect: + return { SubgroupBallotFindLSB_MSB, SubgroupBallot, SubgroupInvocationID }; + case SubgroupInverseBallot_InclBitCount_ExclBitCout: + return { SubgroupMask }; + case SubgroupBallotBitCount: + return { SubgroupBallot }; + case SubgroupArithmeticIAddReduce: + case SubgroupArithmeticIAddInclusiveScan: + case SubgroupArithmeticFAddReduce: + case SubgroupArithmeticFAddInclusiveScan: + case SubgroupArithmeticIMulReduce: + case SubgroupArithmeticIMulInclusiveScan: + case SubgroupArithmeticFMulReduce: + case SubgroupArithmeticFMulInclusiveScan: + return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, SubgroupMask, SubgroupBallotBitExtract }; + case SubgroupArithmeticIAddExclusiveScan: + case SubgroupArithmeticFAddExclusiveScan: + case SubgroupArithmeticIMulExclusiveScan: + case SubgroupArithmeticFMulExclusiveScan: + return { SubgroupSize, SubgroupBallot, SubgroupBallotBitCount, + SubgroupMask, SubgroupElect, SubgroupBallotBitExtract }; + default: + return {}; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper:: + get_feature_dependency_mask(Feature feature) +{ + return build_mask(get_feature_dependencies(feature)); +} + +bool CompilerGLSL::ShaderSubgroupSupportHelper::can_feature_be_implemented_without_extensions(Feature feature) +{ + static const bool retval[FeatureCount] = { + false, false, false, false, false, false, + true, // SubgroupBalloFindLSB_MSB + false, false, false, false, + true, // SubgroupMemBarrier - replaced with workgroup memory barriers + false, false, true, false, + false, false, false, false, false, false, // iadd, fadd + false, false, false, false, false, false, // imul , fmul + }; + + return retval[feature]; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Candidate CompilerGLSL::ShaderSubgroupSupportHelper:: + get_KHR_extension_for_feature(Feature feature) +{ + static const Candidate extensions[FeatureCount] = { + KHR_shader_subgroup_ballot, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_basic, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_vote, + KHR_shader_subgroup_vote, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, KHR_shader_subgroup_basic, + KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, KHR_shader_subgroup_ballot, + KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, KHR_shader_subgroup_arithmetic, + }; + + return extensions[feature]; +} + +void CompilerGLSL::ShaderSubgroupSupportHelper::request_feature(Feature feature) +{ + feature_mask |= (FeatureMask(1) << feature) | get_feature_dependency_mask(feature); +} + +bool CompilerGLSL::ShaderSubgroupSupportHelper::is_feature_requested(Feature feature) const +{ + return (feature_mask & (1u << feature)) != 0; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Result CompilerGLSL::ShaderSubgroupSupportHelper::resolve() const +{ + Result res; + + for (uint32_t i = 0u; i < FeatureCount; ++i) + { + if (feature_mask & (1u << i)) + { + auto feature = static_cast<Feature>(i); + std::unordered_set<uint32_t> unique_candidates; + + auto candidates = get_candidates_for_feature(feature); + unique_candidates.insert(candidates.begin(), candidates.end()); + + auto deps = get_feature_dependencies(feature); + for (Feature d : deps) + { + candidates = get_candidates_for_feature(d); + if (!candidates.empty()) + unique_candidates.insert(candidates.begin(), candidates.end()); + } + + for (uint32_t c : unique_candidates) + ++res.weights[static_cast<Candidate>(c)]; + } + } + + return res; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_candidates_for_feature(Feature ft, const Result &r) +{ + auto c = get_candidates_for_feature(ft); + auto cmp = [&r](Candidate a, Candidate b) { + if (r.weights[a] == r.weights[b]) + return a < b; // Prefer candidates with lower enum value + return r.weights[a] > r.weights[b]; + }; + std::sort(c.begin(), c.end(), cmp); + return c; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::CandidateVector CompilerGLSL::ShaderSubgroupSupportHelper:: + get_candidates_for_feature(Feature feature) +{ + switch (feature) + { + case SubgroupMask: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupSize: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, AMD_gcn_shader, ARB_shader_ballot }; + case SubgroupInvocationID: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupID: + return { KHR_shader_subgroup_basic, NV_shader_thread_group }; + case NumSubgroups: + return { KHR_shader_subgroup_basic, NV_shader_thread_group }; + case SubgroupBroadcast_First: + return { KHR_shader_subgroup_ballot, NV_shader_thread_shuffle, ARB_shader_ballot }; + case SubgroupBallotFindLSB_MSB: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group }; + case SubgroupAll_Any_AllEqualBool: + return { KHR_shader_subgroup_vote, NV_gpu_shader_5, ARB_shader_group_vote, AMD_gcn_shader }; + case SubgroupAllEqualT: + return {}; // depends on other features only + case SubgroupElect: + return {}; // depends on other features only + case SubgroupBallot: + return { KHR_shader_subgroup_ballot, NV_shader_thread_group, ARB_shader_ballot }; + case SubgroupBarrier: + return { KHR_shader_subgroup_basic, NV_shader_thread_group, ARB_shader_ballot, AMD_gcn_shader }; + case SubgroupMemBarrier: + return { KHR_shader_subgroup_basic }; + case SubgroupInverseBallot_InclBitCount_ExclBitCout: + return {}; + case SubgroupBallotBitExtract: + return { NV_shader_thread_group }; + case SubgroupBallotBitCount: + return {}; + case SubgroupArithmeticIAddReduce: + case SubgroupArithmeticIAddExclusiveScan: + case SubgroupArithmeticIAddInclusiveScan: + case SubgroupArithmeticFAddReduce: + case SubgroupArithmeticFAddExclusiveScan: + case SubgroupArithmeticFAddInclusiveScan: + case SubgroupArithmeticIMulReduce: + case SubgroupArithmeticIMulExclusiveScan: + case SubgroupArithmeticIMulInclusiveScan: + case SubgroupArithmeticFMulReduce: + case SubgroupArithmeticFMulExclusiveScan: + case SubgroupArithmeticFMulInclusiveScan: + return { KHR_shader_subgroup_arithmetic, NV_shader_thread_shuffle }; + default: + return {}; + } +} + +CompilerGLSL::ShaderSubgroupSupportHelper::FeatureMask CompilerGLSL::ShaderSubgroupSupportHelper::build_mask( + const SmallVector<Feature> &features) +{ + FeatureMask mask = 0; + for (Feature f : features) + mask |= FeatureMask(1) << f; + return mask; +} + +CompilerGLSL::ShaderSubgroupSupportHelper::Result::Result() +{ + for (auto &weight : weights) + weight = 0; + + // Make sure KHR_shader_subgroup extensions are always prefered. + const uint32_t big_num = FeatureCount; + weights[KHR_shader_subgroup_ballot] = big_num; + weights[KHR_shader_subgroup_basic] = big_num; + weights[KHR_shader_subgroup_vote] = big_num; + weights[KHR_shader_subgroup_arithmetic] = big_num; +} + +void CompilerGLSL::request_workaround_wrapper_overload(TypeID id) +{ + // Must be ordered to maintain deterministic output, so vector is appropriate. + if (find(begin(workaround_ubo_load_overload_types), end(workaround_ubo_load_overload_types), id) == + end(workaround_ubo_load_overload_types)) + { + force_recompile(); + workaround_ubo_load_overload_types.push_back(id); + } +} + +void CompilerGLSL::rewrite_load_for_wrapped_row_major(std::string &expr, TypeID loaded_type, ID ptr) +{ + // Loading row-major matrices from UBOs on older AMD Windows OpenGL drivers is problematic. + // To load these types correctly, we must first wrap them in a dummy function which only purpose is to + // ensure row_major decoration is actually respected. + auto *var = maybe_get_backing_variable(ptr); + if (!var) + return; + + auto &backing_type = get<SPIRType>(var->basetype); + bool is_ubo = backing_type.basetype == SPIRType::Struct && backing_type.storage == StorageClassUniform && + has_decoration(backing_type.self, DecorationBlock); + if (!is_ubo) + return; + + auto *type = &get<SPIRType>(loaded_type); + bool rewrite = false; + bool relaxed = options.es; + + if (is_matrix(*type)) + { + // To avoid adding a lot of unnecessary meta tracking to forward the row_major state, + // we will simply look at the base struct itself. It is exceptionally rare to mix and match row-major/col-major state. + // If there is any row-major action going on, we apply the workaround. + // It is harmless to apply the workaround to column-major matrices, so this is still a valid solution. + // If an access chain occurred, the workaround is not required, so loading vectors or scalars don't need workaround. + type = &backing_type; + } + else + { + // If we're loading a composite, we don't have overloads like these. + relaxed = false; + } + + if (type->basetype == SPIRType::Struct) + { + // If we're loading a struct where any member is a row-major matrix, apply the workaround. + for (uint32_t i = 0; i < uint32_t(type->member_types.size()); i++) + { + auto decorations = combined_decoration_for_member(*type, i); + if (decorations.get(DecorationRowMajor)) + rewrite = true; + + // Since we decide on a per-struct basis, only use mediump wrapper if all candidates are mediump. + if (!decorations.get(DecorationRelaxedPrecision)) + relaxed = false; + } + } + + if (rewrite) + { + request_workaround_wrapper_overload(loaded_type); + expr = join("spvWorkaroundRowMajor", (relaxed ? "MP" : ""), "(", expr, ")"); + } +} + +void CompilerGLSL::mask_stage_output_by_location(uint32_t location, uint32_t component) +{ + masked_output_locations.insert({ location, component }); +} + +void CompilerGLSL::mask_stage_output_by_builtin(BuiltIn builtin) +{ + masked_output_builtins.insert(builtin); +} + +bool CompilerGLSL::is_stage_output_variable_masked(const SPIRVariable &var) const +{ + auto &type = get<SPIRType>(var.basetype); + bool is_block = has_decoration(type.self, DecorationBlock); + // Blocks by themselves are never masked. Must be masked per-member. + if (is_block) + return false; + + bool is_builtin = has_decoration(var.self, DecorationBuiltIn); + + if (is_builtin) + { + return is_stage_output_builtin_masked(BuiltIn(get_decoration(var.self, DecorationBuiltIn))); + } + else + { + if (!has_decoration(var.self, DecorationLocation)) + return false; + + return is_stage_output_location_masked( + get_decoration(var.self, DecorationLocation), + get_decoration(var.self, DecorationComponent)); + } +} + +bool CompilerGLSL::is_stage_output_block_member_masked(const SPIRVariable &var, uint32_t index, bool strip_array) const +{ + auto &type = get<SPIRType>(var.basetype); + bool is_block = has_decoration(type.self, DecorationBlock); + if (!is_block) + return false; + + BuiltIn builtin = BuiltInMax; + if (is_member_builtin(type, index, &builtin)) + { + return is_stage_output_builtin_masked(builtin); + } + else + { + uint32_t location = get_declared_member_location(var, index, strip_array); + uint32_t component = get_member_decoration(type.self, index, DecorationComponent); + return is_stage_output_location_masked(location, component); + } +} + +bool CompilerGLSL::is_per_primitive_variable(const SPIRVariable &var) const +{ + if (has_decoration(var.self, DecorationPerPrimitiveEXT)) + return true; + + auto &type = get<SPIRType>(var.basetype); + if (!has_decoration(type.self, DecorationBlock)) + return false; + + for (uint32_t i = 0, n = uint32_t(type.member_types.size()); i < n; i++) + if (!has_member_decoration(type.self, i, DecorationPerPrimitiveEXT)) + return false; + + return true; +} + +bool CompilerGLSL::is_stage_output_location_masked(uint32_t location, uint32_t component) const +{ + return masked_output_locations.count({ location, component }) != 0; +} + +bool CompilerGLSL::is_stage_output_builtin_masked(spv::BuiltIn builtin) const +{ + return masked_output_builtins.count(builtin) != 0; +} + +uint32_t CompilerGLSL::get_declared_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const +{ + auto &block_type = get<SPIRType>(var.basetype); + if (has_member_decoration(block_type.self, mbr_idx, DecorationLocation)) + return get_member_decoration(block_type.self, mbr_idx, DecorationLocation); + else + return get_accumulated_member_location(var, mbr_idx, strip_array); +} + +uint32_t CompilerGLSL::get_accumulated_member_location(const SPIRVariable &var, uint32_t mbr_idx, bool strip_array) const +{ + auto &type = strip_array ? get_variable_element_type(var) : get_variable_data_type(var); + uint32_t location = get_decoration(var.self, DecorationLocation); + + for (uint32_t i = 0; i < mbr_idx; i++) + { + auto &mbr_type = get<SPIRType>(type.member_types[i]); + + // Start counting from any place we have a new location decoration. + if (has_member_decoration(type.self, mbr_idx, DecorationLocation)) + location = get_member_decoration(type.self, mbr_idx, DecorationLocation); + + uint32_t location_count = type_to_location_count(mbr_type); + location += location_count; + } + + return location; +} + +StorageClass CompilerGLSL::get_expression_effective_storage_class(uint32_t ptr) +{ + auto *var = maybe_get_backing_variable(ptr); + + // If the expression has been lowered to a temporary, we need to use the Generic storage class. + // We're looking for the effective storage class of a given expression. + // An access chain or forwarded OpLoads from such access chains + // will generally have the storage class of the underlying variable, but if the load was not forwarded + // we have lost any address space qualifiers. + bool forced_temporary = ir.ids[ptr].get_type() == TypeExpression && !get<SPIRExpression>(ptr).access_chain && + (forced_temporaries.count(ptr) != 0 || forwarded_temporaries.count(ptr) == 0); + + if (var && !forced_temporary) + { + if (variable_decl_is_remapped_storage(*var, StorageClassWorkgroup)) + return StorageClassWorkgroup; + if (variable_decl_is_remapped_storage(*var, StorageClassStorageBuffer)) + return StorageClassStorageBuffer; + + // Normalize SSBOs to StorageBuffer here. + if (var->storage == StorageClassUniform && + has_decoration(get<SPIRType>(var->basetype).self, DecorationBufferBlock)) + return StorageClassStorageBuffer; + else + return var->storage; + } + else + return expression_type(ptr).storage; +} + +uint32_t CompilerGLSL::type_to_location_count(const SPIRType &type) const +{ + uint32_t count; + if (type.basetype == SPIRType::Struct) + { + uint32_t mbr_count = uint32_t(type.member_types.size()); + count = 0; + for (uint32_t i = 0; i < mbr_count; i++) + count += type_to_location_count(get<SPIRType>(type.member_types[i])); + } + else + { + count = type.columns > 1 ? type.columns : 1; + } + + uint32_t dim_count = uint32_t(type.array.size()); + for (uint32_t i = 0; i < dim_count; i++) + count *= to_array_size_literal(type, i); + + return count; +} + +std::string CompilerGLSL::format_float(float value) const +{ + if (float_formatter) + return float_formatter->format_float(value); + + // default behavior + return convert_to_string(value, current_locale_radix_character); +} + +std::string CompilerGLSL::format_double(double value) const +{ + if (float_formatter) + return float_formatter->format_double(value); + + // default behavior + return convert_to_string(value, current_locale_radix_character); +} + |