diff options
Diffstat (limited to 'drivers')
22 files changed, 541 insertions, 207 deletions
diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index 2894f4164f..2db17e96f7 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -866,7 +866,7 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_rend state.instance_data_array[r_index].lights[2] = lights[2]; state.instance_data_array[r_index].lights[3] = lights[3]; - state.instance_data_array[r_index].flags = base_flags | (state.instance_data_array[r_index == 0 ? 0 : r_index - 1].flags & (FLAGS_DEFAULT_NORMAL_MAP_USED | FLAGS_DEFAULT_SPECULAR_MAP_USED)); //reset on each command for sanity, keep canvastexture binding config + state.instance_data_array[r_index].flags = base_flags | (state.instance_data_array[r_index == 0 ? 0 : r_index - 1].flags & (FLAGS_DEFAULT_NORMAL_MAP_USED | FLAGS_DEFAULT_SPECULAR_MAP_USED)); // Reset on each command for safety, keep canvastexture binding config. Color blend_color = base_color; GLES3::CanvasShaderData::BlendMode blend_mode = p_blend_mode; @@ -1236,7 +1236,7 @@ void RasterizerCanvasGLES3::_record_item_commands(const Item *p_item, RID p_rend } void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { - ERR_FAIL_COND(!state.canvas_instance_batches[state.current_batch_index].command); + ERR_FAIL_NULL(state.canvas_instance_batches[state.current_batch_index].command); // Used by Polygon and Mesh. static const GLenum prim[5] = { GL_POINTS, GL_LINES, GL_LINE_STRIP, GL_TRIANGLES, GL_TRIANGLE_STRIP }; @@ -1383,7 +1383,7 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { GLuint vertex_array_gl = 0; GLuint index_array_gl = 0; - uint32_t input_mask = 0; // 2D meshes always use the same vertex format + uint64_t input_mask = 0; // 2D meshes always use the same vertex format. if (mesh_instance.is_valid()) { mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(mesh_instance, j, input_mask, vertex_array_gl); } else { @@ -1420,6 +1420,13 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index) { glEnableVertexAttribArray(5); glVertexAttribIPointer(5, 4, GL_UNSIGNED_INT, instance_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(instance_color_offset * sizeof(float))); glVertexAttribDivisor(5, 1); + } else { + // Set all default instance color and custom data values to 1.0 or 0.0 using a compressed format. + uint16_t zero = Math::make_half_float(0.0f); + uint16_t one = Math::make_half_float(1.0f); + GLuint default_color = (uint32_t(one) << 16) | one; + GLuint default_custom = (uint32_t(zero) << 16) | zero; + glVertexAttribI4ui(5, default_color, default_color, default_custom, default_custom); } } @@ -2150,7 +2157,7 @@ void RasterizerCanvasGLES3::_bind_canvas_texture(RID p_texture, RS::CanvasItemTe GLES3::Texture *t = texture_storage->get_texture(p_texture); if (t) { - ERR_FAIL_COND(!t->canvas_texture); + ERR_FAIL_NULL(t->canvas_texture); ct = t->canvas_texture; if (t->render_target) { t->render_target->used_in_frame = true; diff --git a/drivers/gles3/rasterizer_canvas_gles3.h b/drivers/gles3/rasterizer_canvas_gles3.h index c1b3e20e33..94c771cde7 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.h +++ b/drivers/gles3/rasterizer_canvas_gles3.h @@ -367,6 +367,12 @@ public: void set_time(double p_time); + virtual void set_debug_redraw(bool p_enabled, double p_time, const Color &p_color) override { + if (p_enabled) { + WARN_PRINT_ONCE("Debug CanvasItem Redraw is not available yet when using the GL Compatibility backend."); + } + } + static RasterizerCanvasGLES3 *get_singleton(); RasterizerCanvasGLES3(); ~RasterizerCanvasGLES3(); diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 1f8e9180e3..fc08f1cf38 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -209,7 +209,7 @@ void RasterizerSceneGLES3::_geometry_instance_add_surface_with_material(Geometry GLES3::SceneMaterialData *material_shadow = nullptr; void *surface_shadow = nullptr; - if (!p_material->shader_data->uses_particle_trails && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_prepass_alpha && !p_material->shader_data->uses_alpha_clip) { + if (!p_material->shader_data->uses_particle_trails && !p_material->shader_data->writes_modelview_or_projection && !p_material->shader_data->uses_vertex && !p_material->shader_data->uses_discard && !p_material->shader_data->uses_depth_prepass_alpha && !p_material->shader_data->uses_alpha_clip && !p_material->shader_data->uses_world_coordinates) { flags |= GeometryInstanceSurface::FLAG_USES_SHARED_SHADOW_MATERIAL; material_shadow = static_cast<GLES3::SceneMaterialData *>(GLES3::MaterialStorage::get_singleton()->material_get_data(scene_globals.default_material, RS::SHADER_SPATIAL)); @@ -2908,6 +2908,18 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, world_transform, shader->version, instance_variant, spec_constants); + { + GLES3::Mesh::Surface *s = reinterpret_cast<GLES3::Mesh::Surface *>(surf->surface); + if (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::COMPRESSED_AABB_POSITION, s->aabb.position, shader->version, instance_variant, spec_constants); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::COMPRESSED_AABB_SIZE, s->aabb.size, shader->version, instance_variant, spec_constants); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::UV_SCALE, s->uv_scale, shader->version, instance_variant, spec_constants); + } else { + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::COMPRESSED_AABB_POSITION, Vector3(0.0, 0.0, 0.0), shader->version, instance_variant, spec_constants); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::COMPRESSED_AABB_SIZE, Vector3(1.0, 1.0, 1.0), shader->version, instance_variant, spec_constants); + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::UV_SCALE, Vector4(0.0, 0.0, 0.0, 0.0), shader->version, instance_variant, spec_constants); + } + } // Can be index count or vertex count uint32_t count = 0; @@ -2962,7 +2974,15 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, glEnableVertexAttribArray(15); glVertexAttribIPointer(15, 4, GL_UNSIGNED_INT, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(color_custom_offset * sizeof(float))); glVertexAttribDivisor(15, 1); + } else { + // Set all default instance color and custom data values to 1.0 or 0.0 using a compressed format. + uint16_t zero = Math::make_half_float(0.0f); + uint16_t one = Math::make_half_float(1.0f); + GLuint default_color = (uint32_t(one) << 16) | one; + GLuint default_custom = (uint32_t(zero) << 16) | zero; + glVertexAttribI4ui(15, default_color, default_color, default_custom, default_custom); } + if (use_index_buffer) { glDrawElementsInstanced(primitive_gl, count, mesh_storage->mesh_surface_get_index_type(mesh_surface), 0, inst->instance_count); } else { diff --git a/drivers/gles3/shaders/particles.glsl b/drivers/gles3/shaders/particles.glsl index 40881a1808..64ef26b075 100644 --- a/drivers/gles3/shaders/particles.glsl +++ b/drivers/gles3/shaders/particles.glsl @@ -78,7 +78,7 @@ layout(std140) uniform FrameData { //ubo:0 float delta; float particle_size; - float pad0; + float amount_ratio; float pad1; float pad2; @@ -89,6 +89,9 @@ layout(std140) uniform FrameData { //ubo:0 mat4 emission_transform; + vec3 emitter_velocity; + float interp_to_end; + Attractor attractors[MAX_ATTRACTORS]; Collider colliders[MAX_COLLIDERS]; }; diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index 0c1a20caed..e0f8e83373 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -52,8 +52,8 @@ ADDITIVE_SPOT = false /* from RenderingServer: -ARRAY_VERTEX = 0, // RG32F or RGB32F (depending on 2D bit) -ARRAY_NORMAL = 1, // RG16 octahedral compression +ARRAY_VERTEX = 0, // RGB32F or RGBA16 +ARRAY_NORMAL = 1, // RG16 octahedral compression or RGBA16 normal + angle ARRAY_TANGENT = 2, // RG16 octahedral compression, sign stored in sign of G ARRAY_COLOR = 3, // RGBA8 ARRAY_TEX_UV = 4, // RG32F @@ -68,16 +68,16 @@ ARRAY_WEIGHTS = 11, // RGBA16UNORM (x2 if 8 weights) /* INPUT ATTRIBS */ -layout(location = 0) in highp vec3 vertex_attrib; +// Always contains vertex position in XYZ, can contain tangent angle in W. +layout(location = 0) in highp vec4 vertex_angle_attrib; /* clang-format on */ #ifdef NORMAL_USED -layout(location = 1) in vec2 normal_attrib; +// Contains Normal/Axis in RG, can contain tangent in BA. +layout(location = 1) in vec4 axis_tangent_attrib; #endif -#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) -layout(location = 2) in vec2 tangent_attrib; -#endif +// location 2 is unused. #if defined(COLOR_USED) layout(location = 3) in vec4 color_attrib; @@ -122,6 +122,16 @@ vec3 oct_to_vec3(vec2 e) { return normalize(v); } +void axis_angle_to_tbn(vec3 axis, float angle, out vec3 tangent, out vec3 binormal, out vec3 normal) { + float c = cos(angle); + float s = sin(angle); + vec3 omc_axis = (1.0 - c) * axis; + vec3 s_axis = s * axis; + tangent = omc_axis.xxx * axis + vec3(c, -s_axis.z, s_axis.y); + binormal = omc_axis.yyy * axis + vec3(s_axis.z, c, -s_axis.x); + normal = omc_axis.zzz * axis + vec3(-s_axis.y, s_axis.x, c); +} + #ifdef USE_INSTANCING layout(location = 12) in highp vec4 instance_xform0; layout(location = 13) in highp vec4 instance_xform1; @@ -228,10 +238,9 @@ multiview_data; #endif uniform highp mat4 world_transform; - -#ifdef USE_LIGHTMAP -uniform highp vec4 lightmap_uv_rect; -#endif +uniform highp vec3 compressed_aabb_position; +uniform highp vec3 compressed_aabb_size; +uniform highp vec4 uv_scale; /* Varyings */ @@ -248,13 +257,9 @@ out vec4 color_interp; out vec2 uv_interp; #endif -#if defined(UV2_USED) -out vec2 uv2_interp; -#else -#ifdef USE_LIGHTMAP +#if defined(UV2_USED) || defined(USE_LIGHTMAP) out vec2 uv2_interp; #endif -#endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) out vec3 tangent_interp; @@ -294,7 +299,7 @@ layout(std140) uniform MaterialUniforms { // ubo:3 invariant gl_Position; void main() { - highp vec3 vertex = vertex_attrib; + highp vec3 vertex = vertex_angle_attrib.xyz * compressed_aabb_size + compressed_aabb_position; highp mat4 model_matrix = world_transform; #ifdef USE_INSTANCING @@ -303,15 +308,30 @@ void main() { #endif #ifdef NORMAL_USED - vec3 normal = oct_to_vec3(normal_attrib * 2.0 - 1.0); + vec3 normal = oct_to_vec3(axis_tangent_attrib.xy * 2.0 - 1.0); #endif highp mat3 model_normal_matrix = mat3(model_matrix); -#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - vec2 signed_tangent_attrib = tangent_attrib * 2.0 - 1.0; - vec3 tangent = oct_to_vec3(vec2(signed_tangent_attrib.x, abs(signed_tangent_attrib.y) * 2.0 - 1.0)); - float binormalf = sign(signed_tangent_attrib.y); - vec3 binormal = normalize(cross(normal, tangent) * binormalf); +#if defined(NORMAL_USED) || defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) + + vec3 binormal; + float binormal_sign; + vec3 tangent; + if (axis_tangent_attrib.z > 0.0 || axis_tangent_attrib.w < 1.0) { + // Uncompressed format. + vec2 signed_tangent_attrib = axis_tangent_attrib.zw * 2.0 - 1.0; + tangent = oct_to_vec3(vec2(signed_tangent_attrib.x, abs(signed_tangent_attrib.y) * 2.0 - 1.0)); + binormal_sign = sign(signed_tangent_attrib.y); + binormal = normalize(cross(normal, tangent) * binormal_sign); + } else { + // Compressed format. + float angle = vertex_angle_attrib.w; + binormal_sign = angle > 0.5 ? 1.0 : -1.0; // 0.5 does not exist in UNORM16, so values are either greater or smaller. + angle = abs(angle * 2.0 - 1.0) * M_PI; // 0.5 is basically zero, allowing to encode both signs reliably. + vec3 axis = normal; + axis_angle_to_tbn(axis, angle, tangent, binormal, normal); + binormal *= binormal_sign; + } #endif #if defined(COLOR_USED) @@ -326,13 +346,18 @@ void main() { uv_interp = uv_attrib; #endif -#ifdef USE_LIGHTMAP - uv2_interp = lightmap_uv_rect.zw * uv2_attrib + lightmap_uv_rect.xy; -#else -#if defined(UV2_USED) +#if defined(UV2_USED) || defined(USE_LIGHTMAP) uv2_interp = uv2_attrib; #endif + + if (uv_scale != vec4(0.0)) { // Compression enabled +#ifdef UV_USED + uv_interp = (uv_interp - 0.5) * uv_scale.xy; +#endif +#if defined(UV2_USED) || defined(USE_LIGHTMAP) + uv2_interp = (uv2_interp - 0.5) * uv_scale.zw; #endif + } #if defined(OVERRIDE_POSITION) highp vec4 position; @@ -392,13 +417,12 @@ void main() { normal = modelview_normal * normal; #endif -#endif - #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) binormal = modelview_normal * binormal; tangent = modelview_normal * tangent; #endif +#endif // !defined(SKIP_TRANSFORM_USED) && !defined(VERTEX_WORLD_COORDS_USED) // Using world coordinates #if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED) @@ -1591,7 +1615,6 @@ void main() { float directional_shadow = 1.0; if (depth_z < light_split_offsets.y) { - float pssm_fade = 0.0; #ifdef LIGHT_USE_PSSM_BLEND float directional_shadow2 = 1.0; @@ -1599,7 +1622,6 @@ void main() { bool use_blend = true; #endif if (depth_z < light_split_offsets.x) { - float pssm_fade = 0.0; directional_shadow = shadow1; #ifdef LIGHT_USE_PSSM_BLEND @@ -1608,7 +1630,6 @@ void main() { #endif } else { directional_shadow = shadow2; - pssm_fade = smoothstep(light_split_offsets.x, light_split_offsets.y, depth_z); #ifdef LIGHT_USE_PSSM_BLEND use_blend = false; #endif @@ -1618,7 +1639,6 @@ void main() { directional_shadow = mix(directional_shadow, directional_shadow2, pssm_blend); } #endif - directional_shadow = mix(directional_shadow, 1.0, pssm_fade); } #endif //LIGHT_USE_PSSM2 @@ -1634,7 +1654,6 @@ void main() { float directional_shadow = 1.0; if (depth_z < light_split_offsets.w) { - float pssm_fade = 0.0; #ifdef LIGHT_USE_PSSM_BLEND float directional_shadow2 = 1.0; @@ -1670,7 +1689,6 @@ void main() { } else { directional_shadow = shadow4; - pssm_fade = smoothstep(light_split_offsets.z, light_split_offsets.w, depth_z); #if defined(LIGHT_USE_PSSM_BLEND) use_blend = false; @@ -1682,7 +1700,6 @@ void main() { directional_shadow = mix(directional_shadow, directional_shadow2, pssm_blend); } #endif - directional_shadow = mix(directional_shadow, 1.0, pssm_fade); } #endif //LIGHT_USE_PSSM4 diff --git a/drivers/gles3/storage/light_storage.cpp b/drivers/gles3/storage/light_storage.cpp index ff06fbfa41..6d4d23bd10 100644 --- a/drivers/gles3/storage/light_storage.cpp +++ b/drivers/gles3/storage/light_storage.cpp @@ -34,6 +34,7 @@ #include "../rasterizer_gles3.h" #include "../rasterizer_scene_gles3.h" #include "config.h" +#include "core/config/project_settings.h" #include "texture_storage.h" using namespace GLES3; @@ -46,6 +47,11 @@ LightStorage *LightStorage::get_singleton() { LightStorage::LightStorage() { singleton = this; + + directional_shadow.size = GLOBAL_GET("rendering/lights_and_shadows/directional_shadow/size"); + directional_shadow.use_16_bits = GLOBAL_GET("rendering/lights_and_shadows/directional_shadow/16_bits"); + + // lightmap_probe_capture_update_speed = GLOBAL_GET("rendering/lightmapping/probe_capture/update_speed"); } LightStorage::~LightStorage() { diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index a594813ed0..b3a3506d40 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -1395,6 +1395,7 @@ MaterialStorage::MaterialStorage() { actions.renames["DELTA"] = "local_delta"; actions.renames["NUMBER"] = "particle_number"; actions.renames["INDEX"] = "index"; + actions.renames["AMOUNT_RATIO"] = "amount_ratio"; //actions.renames["GRAVITY"] = "current_gravity"; actions.renames["EMISSION_TRANSFORM"] = "emission_transform"; actions.renames["RANDOM_SEED"] = "random_seed"; @@ -1407,6 +1408,8 @@ MaterialStorage::MaterialStorage() { actions.renames["COLLISION_NORMAL"] = "collision_normal"; actions.renames["COLLISION_DEPTH"] = "collision_depth"; actions.renames["ATTRACTOR_FORCE"] = "attractor_force"; + actions.renames["EMITTER_VELOCITY"] = "emitter_velocity"; + actions.renames["INTERPOLATE_TO_END"] = "interp_to_end"; // These are unsupported, but may be used by users. To avoid compile time overhead, we add the stub only when used. actions.renames["FLAG_EMIT_POSITION"] = "uint(1)"; @@ -2396,7 +2399,7 @@ void MaterialStorage::material_set_shader(RID p_material, RID p_shader) { return; } - ERR_FAIL_COND(shader->data == nullptr); + ERR_FAIL_NULL(shader->data); material->data = material_data_request_func[shader->mode](shader->data); material->data->self = p_material; @@ -2893,6 +2896,7 @@ void SceneShaderData::set_code(const String &p_code) { actions.render_mode_flags["unshaded"] = &unshaded; actions.render_mode_flags["wireframe"] = &wireframe; actions.render_mode_flags["particle_trails"] = &uses_particle_trails; + actions.render_mode_flags["world_vertex_coords"] = &uses_world_coordinates; actions.usage_flag_pointers["ALPHA"] = &uses_alpha; actions.usage_flag_pointers["ALPHA_SCISSOR_THRESHOLD"] = &uses_alpha_clip; @@ -2944,7 +2948,7 @@ void SceneShaderData::set_code(const String &p_code) { cull_mode = Cull(cull_modei); blend_mode = BlendMode(blend_modei); alpha_antialiasing_mode = AlphaAntiAliasing(alpha_antialiasing_modei); - vertex_input_mask = uint32_t(uses_normal); + vertex_input_mask = uint64_t(uses_normal); vertex_input_mask |= uses_tangent << 1; vertex_input_mask |= uses_color << 2; vertex_input_mask |= uses_uv << 3; diff --git a/drivers/gles3/storage/material_storage.h b/drivers/gles3/storage/material_storage.h index 9c63c8847d..75127bb198 100644 --- a/drivers/gles3/storage/material_storage.h +++ b/drivers/gles3/storage/material_storage.h @@ -316,7 +316,7 @@ struct SceneShaderData : public ShaderData { bool uses_bones; bool uses_weights; - uint32_t vertex_input_mask = 0; + uint64_t vertex_input_mask = 0; uint64_t last_pass = 0; uint32_t index = 0; diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp index cc6031fa57..ae04b63cfe 100644 --- a/drivers/gles3/storage/mesh_storage.cpp +++ b/drivers/gles3/storage/mesh_storage.cpp @@ -117,34 +117,40 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) uint32_t skin_stride = 0; for (int i = 0; i < RS::ARRAY_WEIGHTS; i++) { - if ((p_surface.format & (1 << i))) { + if ((p_surface.format & (1ULL << i))) { switch (i) { case RS::ARRAY_VERTEX: { - if (p_surface.format & RS::ARRAY_FLAG_USE_2D_VERTICES) { + if ((p_surface.format & RS::ARRAY_FLAG_USE_2D_VERTICES) || (p_surface.format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES)) { stride += sizeof(float) * 2; } else { stride += sizeof(float) * 3; } - } break; case RS::ARRAY_NORMAL: { stride += sizeof(uint16_t) * 2; } break; case RS::ARRAY_TANGENT: { - stride += sizeof(uint16_t) * 2; - + if (!(p_surface.format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES)) { + stride += sizeof(uint16_t) * 2; + } } break; case RS::ARRAY_COLOR: { attrib_stride += sizeof(uint32_t); } break; case RS::ARRAY_TEX_UV: { - attrib_stride += sizeof(float) * 2; - + if (p_surface.format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { + attrib_stride += sizeof(uint16_t) * 2; + } else { + attrib_stride += sizeof(float) * 2; + } } break; case RS::ARRAY_TEX_UV2: { - attrib_stride += sizeof(float) * 2; - + if (p_surface.format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { + attrib_stride += sizeof(uint16_t) * 2; + } else { + attrib_stride += sizeof(float) * 2; + } } break; case RS::ARRAY_CUSTOM0: case RS::ARRAY_CUSTOM1: @@ -185,92 +191,121 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) #endif + uint64_t surface_version = p_surface.format & (uint64_t(RS::ARRAY_FLAG_FORMAT_VERSION_MASK) << RS::ARRAY_FLAG_FORMAT_VERSION_SHIFT); + RS::SurfaceData new_surface = p_surface; +#ifdef DISABLE_DEPRECATED + + ERR_FAIL_COND_MSG(surface_version != RS::ARRAY_FLAG_FORMAT_CURRENT_VERSION, "Surface version provided (" + itos(int(surface_version >> RS::ARRAY_FLAG_FORMAT_VERSION_SHIFT)) + ") does not match current version (" + itos(RS::ARRAY_FLAG_FORMAT_CURRENT_VERSION >> RS::ARRAY_FLAG_FORMAT_VERSION_SHIFT) + ")"); + +#else + + if (surface_version != uint64_t(RS::ARRAY_FLAG_FORMAT_CURRENT_VERSION)) { + RS::_fix_surface_compatibility(new_surface); + surface_version = new_surface.format & (uint64_t(RS::ARRAY_FLAG_FORMAT_VERSION_MASK) << RS::ARRAY_FLAG_FORMAT_VERSION_SHIFT); + ERR_FAIL_COND_MSG(surface_version != uint64_t(RS::ARRAY_FLAG_FORMAT_CURRENT_VERSION), + "Surface version provided (" + + itos((surface_version >> RS::ARRAY_FLAG_FORMAT_VERSION_SHIFT) & RS::ARRAY_FLAG_FORMAT_VERSION_MASK) + + ") does not match current version (" + + itos((uint64_t(RS::ARRAY_FLAG_FORMAT_CURRENT_VERSION) >> RS::ARRAY_FLAG_FORMAT_VERSION_SHIFT) & RS::ARRAY_FLAG_FORMAT_VERSION_MASK) + + ")"); + } +#endif + Mesh::Surface *s = memnew(Mesh::Surface); - s->format = p_surface.format; - s->primitive = p_surface.primitive; + s->format = new_surface.format; + s->primitive = new_surface.primitive; - if (p_surface.vertex_data.size()) { + if (new_surface.vertex_data.size()) { glGenBuffers(1, &s->vertex_buffer); glBindBuffer(GL_ARRAY_BUFFER, s->vertex_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->vertex_buffer, p_surface.vertex_data.size(), p_surface.vertex_data.ptr(), (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh vertex buffer"); - s->vertex_buffer_size = p_surface.vertex_data.size(); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->vertex_buffer, new_surface.vertex_data.size(), new_surface.vertex_data.ptr(), (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh vertex buffer"); + s->vertex_buffer_size = new_surface.vertex_data.size(); } - if (p_surface.attribute_data.size()) { + if (new_surface.attribute_data.size()) { glGenBuffers(1, &s->attribute_buffer); glBindBuffer(GL_ARRAY_BUFFER, s->attribute_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->attribute_buffer, p_surface.attribute_data.size(), p_surface.attribute_data.ptr(), (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh attribute buffer"); - s->attribute_buffer_size = p_surface.attribute_data.size(); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->attribute_buffer, new_surface.attribute_data.size(), new_surface.attribute_data.ptr(), (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh attribute buffer"); + s->attribute_buffer_size = new_surface.attribute_data.size(); } - if (p_surface.skin_data.size()) { + if (new_surface.skin_data.size()) { glGenBuffers(1, &s->skin_buffer); glBindBuffer(GL_ARRAY_BUFFER, s->skin_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->skin_buffer, p_surface.skin_data.size(), p_surface.skin_data.ptr(), (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh skin buffer"); - s->skin_buffer_size = p_surface.skin_data.size(); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->skin_buffer, new_surface.skin_data.size(), new_surface.skin_data.ptr(), (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh skin buffer"); + s->skin_buffer_size = new_surface.skin_data.size(); } glBindBuffer(GL_ARRAY_BUFFER, 0); - s->vertex_count = p_surface.vertex_count; + s->vertex_count = new_surface.vertex_count; - if (p_surface.format & RS::ARRAY_FORMAT_BONES) { + if (new_surface.format & RS::ARRAY_FORMAT_BONES) { mesh->has_bone_weights = true; } - if (p_surface.index_count) { - bool is_index_16 = p_surface.vertex_count <= 65536 && p_surface.vertex_count > 0; + if (new_surface.index_count) { + bool is_index_16 = new_surface.vertex_count <= 65536 && new_surface.vertex_count > 0; glGenBuffers(1, &s->index_buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s->index_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ELEMENT_ARRAY_BUFFER, s->index_buffer, p_surface.index_data.size(), p_surface.index_data.ptr(), GL_STATIC_DRAW, "Mesh index buffer"); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ELEMENT_ARRAY_BUFFER, s->index_buffer, new_surface.index_data.size(), new_surface.index_data.ptr(), GL_STATIC_DRAW, "Mesh index buffer"); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); //unbind - s->index_count = p_surface.index_count; - s->index_buffer_size = p_surface.index_data.size(); + s->index_count = new_surface.index_count; + s->index_buffer_size = new_surface.index_data.size(); - if (p_surface.lods.size()) { - s->lods = memnew_arr(Mesh::Surface::LOD, p_surface.lods.size()); - s->lod_count = p_surface.lods.size(); + if (new_surface.lods.size()) { + s->lods = memnew_arr(Mesh::Surface::LOD, new_surface.lods.size()); + s->lod_count = new_surface.lods.size(); - for (int i = 0; i < p_surface.lods.size(); i++) { + for (int i = 0; i < new_surface.lods.size(); i++) { glGenBuffers(1, &s->lods[i].index_buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, s->lods[i].index_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ELEMENT_ARRAY_BUFFER, s->lods[i].index_buffer, p_surface.lods[i].index_data.size(), p_surface.lods[i].index_data.ptr(), GL_STATIC_DRAW, "Mesh index buffer LOD[" + itos(i) + "]"); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ELEMENT_ARRAY_BUFFER, s->lods[i].index_buffer, new_surface.lods[i].index_data.size(), new_surface.lods[i].index_data.ptr(), GL_STATIC_DRAW, "Mesh index buffer LOD[" + itos(i) + "]"); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); //unbind - s->lods[i].edge_length = p_surface.lods[i].edge_length; - s->lods[i].index_count = p_surface.lods[i].index_data.size() / (is_index_16 ? 2 : 4); - s->lods[i].index_buffer_size = p_surface.lods[i].index_data.size(); + s->lods[i].edge_length = new_surface.lods[i].edge_length; + s->lods[i].index_count = new_surface.lods[i].index_data.size() / (is_index_16 ? 2 : 4); + s->lods[i].index_buffer_size = new_surface.lods[i].index_data.size(); } } } - ERR_FAIL_COND_MSG(!p_surface.index_count && !p_surface.vertex_count, "Meshes must contain a vertex array, an index array, or both"); + ERR_FAIL_COND_MSG(!new_surface.index_count && !new_surface.vertex_count, "Meshes must contain a vertex array, an index array, or both"); - s->aabb = p_surface.aabb; - s->bone_aabbs = p_surface.bone_aabbs; //only really useful for returning them. + s->aabb = new_surface.aabb; + s->bone_aabbs = new_surface.bone_aabbs; //only really useful for returning them. - if (p_surface.skin_data.size() || mesh->blend_shape_count > 0) { + s->uv_scale = new_surface.uv_scale; + + if (new_surface.skin_data.size() || mesh->blend_shape_count > 0) { // Size must match the size of the vertex array. - int size = p_surface.vertex_data.size(); + int size = new_surface.vertex_data.size(); int vertex_size = 0; - int stride = 0; + int position_stride = 0; + int normal_tangent_stride = 0; int normal_offset = 0; int tangent_offset = 0; - if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { - if (p_surface.format & RS::ARRAY_FLAG_USE_2D_VERTICES) { + if ((new_surface.format & (1ULL << RS::ARRAY_VERTEX))) { + if (new_surface.format & RS::ARRAY_FLAG_USE_2D_VERTICES) { vertex_size = 2; + position_stride = sizeof(float) * vertex_size; } else { - vertex_size = 3; + if (new_surface.format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { + vertex_size = 4; + position_stride = sizeof(uint16_t) * vertex_size; + } else { + vertex_size = 3; + position_stride = sizeof(float) * vertex_size; + } } - stride = sizeof(float) * vertex_size; } - if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { - normal_offset = stride; - stride += sizeof(uint16_t) * 2; + if ((new_surface.format & (1ULL << RS::ARRAY_NORMAL))) { + normal_offset = position_stride * s->vertex_count; + normal_tangent_stride += sizeof(uint16_t) * 2; } - if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { - tangent_offset = stride; - stride += sizeof(uint16_t) * 2; + if ((new_surface.format & (1ULL << RS::ARRAY_TANGENT))) { + tangent_offset = normal_offset + normal_tangent_stride; + normal_tangent_stride += sizeof(uint16_t) * 2; } if (mesh->blend_shape_count > 0) { @@ -282,54 +317,38 @@ void MeshStorage::mesh_add_surface(RID p_mesh, const RS::SurfaceData &p_surface) glBindVertexArray(s->blend_shapes[i].vertex_array); glGenBuffers(1, &s->blend_shapes[i].vertex_buffer); glBindBuffer(GL_ARRAY_BUFFER, s->blend_shapes[i].vertex_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->blend_shapes[i].vertex_buffer, size, p_surface.blend_shape_data.ptr() + i * size, (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh blend shape buffer"); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s->blend_shapes[i].vertex_buffer, size, new_surface.blend_shape_data.ptr() + i * size, (s->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Mesh blend shape buffer"); - if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { + if ((new_surface.format & (1ULL << RS::ARRAY_VERTEX))) { glEnableVertexAttribArray(RS::ARRAY_VERTEX + 3); - glVertexAttribPointer(RS::ARRAY_VERTEX + 3, vertex_size, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(0)); + glVertexAttribPointer(RS::ARRAY_VERTEX + 3, vertex_size, GL_FLOAT, GL_FALSE, position_stride, CAST_INT_TO_UCHAR_PTR(0)); } - if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { + if ((new_surface.format & (1ULL << RS::ARRAY_NORMAL))) { + // Normal and tangent are packed into the same attribute. glEnableVertexAttribArray(RS::ARRAY_NORMAL + 3); - glVertexAttribPointer(RS::ARRAY_NORMAL + 3, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(normal_offset)); + glVertexAttribPointer(RS::ARRAY_NORMAL + 3, 2, GL_UNSIGNED_SHORT, GL_TRUE, normal_tangent_stride, CAST_INT_TO_UCHAR_PTR(normal_offset)); } - if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { + if ((p_surface.format & (1ULL << RS::ARRAY_TANGENT))) { glEnableVertexAttribArray(RS::ARRAY_TANGENT + 3); - glVertexAttribPointer(RS::ARRAY_TANGENT + 3, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(tangent_offset)); + glVertexAttribPointer(RS::ARRAY_TANGENT + 3, 2, GL_UNSIGNED_SHORT, GL_TRUE, normal_tangent_stride, CAST_INT_TO_UCHAR_PTR(tangent_offset)); } } glBindVertexArray(0); glBindBuffer(GL_ARRAY_BUFFER, 0); } - // Create a vertex array to use for skeleton/blend shapes. - glGenVertexArrays(1, &s->skeleton_vertex_array); - glBindVertexArray(s->skeleton_vertex_array); - glBindBuffer(GL_ARRAY_BUFFER, s->vertex_buffer); - - if ((p_surface.format & (1 << RS::ARRAY_VERTEX))) { - glEnableVertexAttribArray(RS::ARRAY_VERTEX); - glVertexAttribPointer(RS::ARRAY_VERTEX, vertex_size, GL_FLOAT, GL_FALSE, stride, CAST_INT_TO_UCHAR_PTR(0)); - } - if ((p_surface.format & (1 << RS::ARRAY_NORMAL))) { - glEnableVertexAttribArray(RS::ARRAY_NORMAL); - glVertexAttribPointer(RS::ARRAY_NORMAL, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(normal_offset)); - } - if ((p_surface.format & (1 << RS::ARRAY_TANGENT))) { - glEnableVertexAttribArray(RS::ARRAY_TANGENT); - glVertexAttribPointer(RS::ARRAY_TANGENT, 2, GL_UNSIGNED_SHORT, GL_TRUE, stride, CAST_INT_TO_UCHAR_PTR(tangent_offset)); - } glBindVertexArray(0); glBindBuffer(GL_ARRAY_BUFFER, 0); } if (mesh->surface_count == 0) { - mesh->aabb = p_surface.aabb; + mesh->aabb = new_surface.aabb; } else { - mesh->aabb.merge_with(p_surface.aabb); + mesh->aabb.merge_with(new_surface.aabb); } mesh->skeleton_aabb_version = 0; - s->material = p_surface.material; + s->material = new_surface.material; mesh->surfaces = (Mesh::Surface **)memrealloc(mesh->surfaces, sizeof(Mesh::Surface *) * (mesh->surface_count + 1)); mesh->surfaces[mesh->surface_count] = s; @@ -479,6 +498,8 @@ RS::SurfaceData MeshStorage::mesh_get_surface(RID p_mesh, int p_surface) const { } } + sd.uv_scale = s.uv_scale; + return sd; } @@ -696,10 +717,6 @@ void MeshStorage::mesh_clear(RID p_mesh) { } memdelete_arr(s.blend_shapes); } - if (s.skeleton_vertex_array != 0) { - glDeleteVertexArrays(1, &s.skeleton_vertex_array); - s.skeleton_vertex_array = 0; - } memdelete(mesh->surfaces[i]); } @@ -720,15 +737,16 @@ void MeshStorage::mesh_clear(RID p_mesh) { } } -void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint32_t p_input_mask, MeshInstance::Surface *mis) { +void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, MeshInstance::Surface *mis) { Mesh::Surface::Attrib attribs[RS::ARRAY_MAX]; + int position_stride = 0; // Vertex position only. + int normal_tangent_stride = 0; int attributes_stride = 0; - int vertex_stride = 0; int skin_stride = 0; for (int i = 0; i < RS::ARRAY_INDEX; i++) { - if (!(s->format & (1 << i))) { + if (!(s->format & (1ULL << i))) { attribs[i].enabled = false; attribs[i].integer = false; continue; @@ -739,29 +757,55 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V switch (i) { case RS::ARRAY_VERTEX: { - attribs[i].offset = vertex_stride; + attribs[i].offset = 0; + attribs[i].type = GL_FLOAT; + attribs[i].normalized = GL_FALSE; if (s->format & RS::ARRAY_FLAG_USE_2D_VERTICES) { attribs[i].size = 2; + position_stride = attribs[i].size * sizeof(float); } else { - attribs[i].size = 3; + if (!mis && (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES)) { + attribs[i].size = 4; + position_stride = attribs[i].size * sizeof(uint16_t); + attribs[i].type = GL_UNSIGNED_SHORT; + attribs[i].normalized = GL_TRUE; + } else { + attribs[i].size = 3; + position_stride = attribs[i].size * sizeof(float); + } } - attribs[i].type = GL_FLOAT; - vertex_stride += attribs[i].size * sizeof(float); - attribs[i].normalized = GL_FALSE; } break; case RS::ARRAY_NORMAL: { - attribs[i].offset = vertex_stride; - attribs[i].size = 2; + if (!mis && (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES)) { + attribs[i].size = 2; + normal_tangent_stride += 2 * attribs[i].size; + } else { + attribs[i].size = 4; + // A small trick here: if we are uncompressed and we have normals, but no tangents. We need + // the shader to think there are 4 components to "axis_tangent_attrib". So we give a size of 4, + // but a stride based on only having 2 elements. + if (!(s->format & RS::ARRAY_FORMAT_TANGENT)) { + normal_tangent_stride += (mis ? sizeof(float) : sizeof(uint16_t)) * 2; + } else { + normal_tangent_stride += (mis ? sizeof(float) : sizeof(uint16_t)) * 4; + } + } + + if (mis) { + // Transform feedback has interleave all or no attributes. It can't mix interleaving. + attribs[i].offset = position_stride; + normal_tangent_stride += position_stride; + position_stride = normal_tangent_stride; + } else { + attribs[i].offset = position_stride * s->vertex_count; + } attribs[i].type = (mis ? GL_FLOAT : GL_UNSIGNED_SHORT); - vertex_stride += sizeof(uint16_t) * 2 * (mis ? 2 : 1); attribs[i].normalized = GL_TRUE; } break; case RS::ARRAY_TANGENT: { - attribs[i].offset = vertex_stride; - attribs[i].size = 2; - attribs[i].type = (mis ? GL_FLOAT : GL_UNSIGNED_SHORT); - vertex_stride += sizeof(uint16_t) * 2 * (mis ? 2 : 1); - attribs[i].normalized = GL_TRUE; + // We never use the tangent attribute. It is always packed in ARRAY_NORMAL, or ARRAY_VERTEX. + attribs[i].enabled = false; + attribs[i].integer = false; } break; case RS::ARRAY_COLOR: { attribs[i].offset = attributes_stride; @@ -773,16 +817,28 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V case RS::ARRAY_TEX_UV: { attribs[i].offset = attributes_stride; attribs[i].size = 2; - attribs[i].type = GL_FLOAT; - attributes_stride += 2 * sizeof(float); - attribs[i].normalized = GL_FALSE; + if (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { + attribs[i].type = GL_UNSIGNED_SHORT; + attributes_stride += 2 * sizeof(uint16_t); + attribs[i].normalized = GL_TRUE; + } else { + attribs[i].type = GL_FLOAT; + attributes_stride += 2 * sizeof(float); + attribs[i].normalized = GL_FALSE; + } } break; case RS::ARRAY_TEX_UV2: { attribs[i].offset = attributes_stride; attribs[i].size = 2; - attribs[i].type = GL_FLOAT; - attributes_stride += 2 * sizeof(float); - attribs[i].normalized = GL_FALSE; + if (s->format & RS::ARRAY_FLAG_COMPRESS_ATTRIBUTES) { + attribs[i].type = GL_UNSIGNED_SHORT; + attributes_stride += 2 * sizeof(uint16_t); + attribs[i].normalized = GL_TRUE; + } else { + attribs[i].type = GL_FLOAT; + attributes_stride += 2 * sizeof(float); + attribs[i].normalized = GL_FALSE; + } } break; case RS::ARRAY_CUSTOM0: case RS::ARRAY_CUSTOM1: @@ -828,7 +884,7 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V continue; } if (i <= RS::ARRAY_TANGENT) { - attribs[i].stride = vertex_stride; + attribs[i].stride = (i == RS::ARRAY_VERTEX) ? position_stride : normal_tangent_stride; if (mis) { glBindBuffer(GL_ARRAY_BUFFER, mis->vertex_buffer); } else { @@ -946,7 +1002,7 @@ void MeshStorage::_mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint3 if ((mesh->blend_shape_count > 0 || (mesh->surfaces[p_surface]->format & RS::ARRAY_FORMAT_BONES)) && mesh->surfaces[p_surface]->vertex_buffer_size > 0) { // Cache surface properties s.format_cache = mesh->surfaces[p_surface]->format; - if ((s.format_cache & (1 << RS::ARRAY_VERTEX))) { + if ((s.format_cache & (1ULL << RS::ARRAY_VERTEX))) { if (s.format_cache & RS::ARRAY_FLAG_USE_2D_VERTICES) { s.vertex_size_cache = 2; } else { @@ -954,25 +1010,27 @@ void MeshStorage::_mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint3 } s.vertex_stride_cache = sizeof(float) * s.vertex_size_cache; } - if ((s.format_cache & (1 << RS::ARRAY_NORMAL))) { + if ((s.format_cache & (1ULL << RS::ARRAY_NORMAL))) { s.vertex_normal_offset_cache = s.vertex_stride_cache; s.vertex_stride_cache += sizeof(uint32_t) * 2; } - if ((s.format_cache & (1 << RS::ARRAY_TANGENT))) { + if ((s.format_cache & (1ULL << RS::ARRAY_TANGENT))) { s.vertex_tangent_offset_cache = s.vertex_stride_cache; s.vertex_stride_cache += sizeof(uint32_t) * 2; } + int buffer_size = s.vertex_stride_cache * mesh->surfaces[p_surface]->vertex_count; + // Buffer to be used for rendering. Final output of skeleton and blend shapes. glGenBuffers(1, &s.vertex_buffer); glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s.vertex_buffer, s.vertex_stride_cache * mesh->surfaces[p_surface]->vertex_count, nullptr, GL_DYNAMIC_DRAW, "MeshInstance vertex buffer"); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s.vertex_buffer, buffer_size, nullptr, GL_DYNAMIC_DRAW, "MeshInstance vertex buffer"); if (mesh->blend_shape_count > 0) { // Ping-Pong buffers for processing blendshapes. glGenBuffers(2, s.vertex_buffers); for (uint32_t i = 0; i < 2; i++) { glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffers[i]); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s.vertex_buffers[i], s.vertex_stride_cache * mesh->surfaces[p_surface]->vertex_count, nullptr, GL_DYNAMIC_DRAW, "MeshInstance process buffer[" + itos(i) + "]"); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s.vertex_buffers[i], buffer_size, nullptr, GL_DYNAMIC_DRAW, "MeshInstance process buffer[" + itos(i) + "]"); } } glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind @@ -1011,19 +1069,19 @@ void MeshStorage::mesh_instance_set_canvas_item_transform(RID p_mesh_instance, c void MeshStorage::_blend_shape_bind_mesh_instance_buffer(MeshInstance *p_mi, uint32_t p_surface) { glBindBuffer(GL_ARRAY_BUFFER, p_mi->surfaces[p_surface].vertex_buffers[0]); - if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_VERTEX))) { + if ((p_mi->surfaces[p_surface].format_cache & (1ULL << RS::ARRAY_VERTEX))) { glEnableVertexAttribArray(RS::ARRAY_VERTEX); glVertexAttribPointer(RS::ARRAY_VERTEX, p_mi->surfaces[p_surface].vertex_size_cache, GL_FLOAT, GL_FALSE, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(0)); } else { glDisableVertexAttribArray(RS::ARRAY_VERTEX); } - if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_NORMAL))) { + if ((p_mi->surfaces[p_surface].format_cache & (1ULL << RS::ARRAY_NORMAL))) { glEnableVertexAttribArray(RS::ARRAY_NORMAL); glVertexAttribIPointer(RS::ARRAY_NORMAL, 2, GL_UNSIGNED_INT, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(p_mi->surfaces[p_surface].vertex_normal_offset_cache)); } else { glDisableVertexAttribArray(RS::ARRAY_NORMAL); } - if ((p_mi->surfaces[p_surface].format_cache & (1 << RS::ARRAY_TANGENT))) { + if ((p_mi->surfaces[p_surface].format_cache & (1ULL << RS::ARRAY_TANGENT))) { glEnableVertexAttribArray(RS::ARRAY_TANGENT); glVertexAttribIPointer(RS::ARRAY_TANGENT, 2, GL_UNSIGNED_INT, p_mi->surfaces[p_surface].vertex_stride_cache, CAST_INT_TO_UCHAR_PTR(p_mi->surfaces[p_surface].vertex_tangent_offset_cache)); } else { @@ -1091,7 +1149,7 @@ void MeshStorage::update_mesh_instances() { } for (uint32_t i = 0; i < mi->surfaces.size(); i++) { - if (mi->surfaces[i].vertex_buffer == 0 || mi->mesh->surfaces[i]->skeleton_vertex_array == 0) { + if (mi->surfaces[i].vertex_buffer == 0) { continue; } @@ -1106,10 +1164,10 @@ void MeshStorage::update_mesh_instances() { specialization |= array_is_2d ? SkeletonShaderGLES3::MODE_2D : 0; specialization |= SkeletonShaderGLES3::USE_BLEND_SHAPES; if (!array_is_2d) { - if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_NORMAL))) { + if ((mi->surfaces[i].format_cache & (1ULL << RS::ARRAY_NORMAL))) { specialization |= SkeletonShaderGLES3::USE_NORMAL; } - if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_TANGENT))) { + if ((mi->surfaces[i].format_cache & (1ULL << RS::ARRAY_TANGENT))) { specialization |= SkeletonShaderGLES3::USE_TANGENT; } } @@ -1123,7 +1181,12 @@ void MeshStorage::update_mesh_instances() { skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_SHAPE_COUNT, float(mi->mesh->blend_shape_count), skeleton_shader.shader_version, variant, specialization); glBindBuffer(GL_ARRAY_BUFFER, 0); - glBindVertexArray(mi->mesh->surfaces[i]->skeleton_vertex_array); + GLuint vertex_array_gl = 0; + uint64_t mask = ((1 << 10) - 1) << 3; // Mask from ARRAY_FORMAT_COLOR to ARRAY_FORMAT_INDEX. + mask = ~mask; + uint64_t format = mi->surfaces[i].format_cache & mask; // Format should only have vertex, normal, tangent (as necessary) + compressions. + mesh_surface_get_vertex_arrays_and_format(mi->mesh->surfaces[i], format, vertex_array_gl); + glBindVertexArray(vertex_array_gl); glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffers[0]); glBeginTransformFeedback(GL_POINTS); glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); @@ -1210,10 +1273,10 @@ void MeshStorage::update_mesh_instances() { specialization |= SkeletonShaderGLES3::FINAL_PASS; specialization |= use_8_weights ? SkeletonShaderGLES3::USE_EIGHT_WEIGHTS : 0; if (!array_is_2d) { - if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_NORMAL))) { + if ((mi->surfaces[i].format_cache & (1ULL << RS::ARRAY_NORMAL))) { specialization |= SkeletonShaderGLES3::USE_NORMAL; } - if ((mi->surfaces[i].format_cache & (1 << RS::ARRAY_TANGENT))) { + if ((mi->surfaces[i].format_cache & (1ULL << RS::ARRAY_TANGENT))) { specialization |= SkeletonShaderGLES3::USE_TANGENT; } } @@ -1233,7 +1296,12 @@ void MeshStorage::update_mesh_instances() { skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::INVERSE_TRANSFORM_Y, inverse_transform[1], skeleton_shader.shader_version, variant, specialization); skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::INVERSE_TRANSFORM_OFFSET, inverse_transform[2], skeleton_shader.shader_version, variant, specialization); - glBindVertexArray(mi->mesh->surfaces[i]->skeleton_vertex_array); + GLuint vertex_array_gl = 0; + uint64_t mask = ((1 << 10) - 1) << 3; // Mask from ARRAY_FORMAT_COLOR to ARRAY_FORMAT_INDEX. + mask = ~mask; + uint64_t format = mi->surfaces[i].format_cache & mask; // Format should only have vertex, normal, tangent (as necessary) + compressions. + mesh_surface_get_vertex_arrays_and_format(mi->mesh->surfaces[i], format, vertex_array_gl); + glBindVertexArray(vertex_array_gl); _compute_skeleton(mi, sk, i); } } diff --git a/drivers/gles3/storage/mesh_storage.h b/drivers/gles3/storage/mesh_storage.h index 09212e4b5c..25b15ab6a6 100644 --- a/drivers/gles3/storage/mesh_storage.h +++ b/drivers/gles3/storage/mesh_storage.h @@ -58,7 +58,7 @@ struct Mesh { uint32_t offset; }; RS::PrimitiveType primitive = RS::PRIMITIVE_POINTS; - uint32_t format = 0; + uint64_t format = 0; GLuint vertex_buffer = 0; GLuint attribute_buffer = 0; @@ -98,6 +98,8 @@ struct Mesh { Vector<AABB> bone_aabbs; + Vector4 uv_scale; + struct BlendShape { GLuint vertex_buffer = 0; GLuint vertex_array = 0; @@ -144,7 +146,7 @@ struct MeshInstance { int vertex_size_cache = 0; int vertex_normal_offset_cache = 0; int vertex_tangent_offset_cache = 0; - uint32_t format_cache = 0; + uint64_t format_cache = 0; Mesh::Surface::Version *versions = nullptr; //allocated on demand uint32_t version_count = 0; @@ -221,7 +223,7 @@ private: mutable RID_Owner<Mesh, true> mesh_owner; - void _mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint32_t p_input_mask, MeshInstance::Surface *mis = nullptr); + void _mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, MeshInstance::Surface *mis = nullptr); /* Mesh Instance API */ @@ -381,18 +383,18 @@ public: } // Use this to cache Vertex Array Objects so they are only generated once - _FORCE_INLINE_ void mesh_surface_get_vertex_arrays_and_format(void *p_surface, uint32_t p_input_mask, GLuint &r_vertex_array_gl) { + _FORCE_INLINE_ void mesh_surface_get_vertex_arrays_and_format(void *p_surface, uint64_t p_input_mask, GLuint &r_vertex_array_gl) { Mesh::Surface *s = reinterpret_cast<Mesh::Surface *>(p_surface); s->version_lock.lock(); - //there will never be more than, at much, 3 or 4 versions, so iterating is the fastest way + // There will never be more than 3 or 4 versions, so iterating is the fastest way. for (uint32_t i = 0; i < s->version_count; i++) { if (s->versions[i].input_mask != p_input_mask) { continue; } - //we have this version, hooray + // We have this version, hooray. r_vertex_array_gl = s->versions[i].vertex_array; s->version_lock.unlock(); return; @@ -424,7 +426,7 @@ public: // TODO: considering hashing versions with multimesh buffer RID. // Doing so would allow us to avoid specifying multimesh buffer pointers every frame and may improve performance. - _FORCE_INLINE_ void mesh_instance_surface_get_vertex_arrays_and_format(RID p_mesh_instance, uint32_t p_surface_index, uint32_t p_input_mask, GLuint &r_vertex_array_gl) { + _FORCE_INLINE_ void mesh_instance_surface_get_vertex_arrays_and_format(RID p_mesh_instance, uint32_t p_surface_index, uint64_t p_input_mask, GLuint &r_vertex_array_gl) { MeshInstance *mi = mesh_instance_owner.get_or_null(p_mesh_instance); ERR_FAIL_NULL(mi); Mesh *mesh = mi->mesh; diff --git a/drivers/gles3/storage/particles_storage.cpp b/drivers/gles3/storage/particles_storage.cpp index 0a6f02511c..1caa3bbe35 100644 --- a/drivers/gles3/storage/particles_storage.cpp +++ b/drivers/gles3/storage/particles_storage.cpp @@ -94,13 +94,15 @@ RID ParticlesStorage::particles_allocate() { } void ParticlesStorage::particles_initialize(RID p_rid) { - particles_owner.initialize_rid(p_rid, Particles()); + particles_owner.initialize_rid(p_rid); } void ParticlesStorage::particles_free(RID p_rid) { - update_particles(); Particles *particles = particles_owner.get_or_null(p_rid); + particles->dependency.deleted_notify(p_rid); + particles->update_list.remove_from_list(); + _particles_free_data(particles); particles_owner.free(p_rid); } @@ -190,6 +192,13 @@ void ParticlesStorage::particles_set_amount(RID p_particles, int p_amount) { particles->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_PARTICLES); } +void ParticlesStorage::particles_set_amount_ratio(RID p_particles, float p_amount_ratio) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_NULL(particles); + + particles->amount_ratio = p_amount_ratio; +} + void ParticlesStorage::particles_set_lifetime(RID p_particles, double p_lifetime) { Particles *particles = particles_owner.get_or_null(p_particles); ERR_FAIL_NULL(particles); @@ -355,8 +364,10 @@ void ParticlesStorage::particles_request_process(RID p_particles) { if (!particles->dirty) { particles->dirty = true; - particles->update_list = particle_update_list; - particle_update_list = particles; + + if (!particles->update_list.in_list()) { + particle_update_list.add(&particles->update_list); + } } } @@ -431,6 +442,20 @@ void ParticlesStorage::particles_set_emission_transform(RID p_particles, const T particles->emission_transform = p_transform; } +void ParticlesStorage::particles_set_emitter_velocity(RID p_particles, const Vector3 &p_velocity) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_NULL(particles); + + particles->emitter_velocity = p_velocity; +} + +void ParticlesStorage::particles_set_interp_to_end(RID p_particles, float p_interp) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_NULL(particles); + + particles->interp_to_end = p_interp; +} + int ParticlesStorage::particles_get_draw_passes(RID p_particles) const { const Particles *particles = particles_owner.get_or_null(p_particles); ERR_FAIL_NULL_V(particles, 0); @@ -507,9 +532,13 @@ void ParticlesStorage::_particles_process(Particles *p_particles, double p_delta frame_params.cycle = p_particles->cycle_number; frame_params.frame = p_particles->frame_counter++; - frame_params.pad0 = 0; + frame_params.amount_ratio = p_particles->amount_ratio; frame_params.pad1 = 0; frame_params.pad2 = 0; + frame_params.interp_to_end = p_particles->interp_to_end; + frame_params.emitter_velocity[0] = p_particles->emitter_velocity.x; + frame_params.emitter_velocity[1] = p_particles->emitter_velocity.y; + frame_params.emitter_velocity[2] = p_particles->emitter_velocity.z; { //collision and attractors @@ -775,7 +804,7 @@ void ParticlesStorage::particles_set_view_axis(RID p_particles, const Vector3 &p LocalVector<ParticleInstanceData3D> particle_vector; particle_vector.resize(particles->amount); particle_array = particle_vector.ptr(); - glGetBufferSubData(GL_ARRAY_BUFFER, 0, particles->amount * sizeof(ParticleInstanceData3D), particle_array); + godot_webgl2_glGetBufferSubData(GL_ARRAY_BUFFER, 0, particles->amount * sizeof(ParticleInstanceData3D), particle_array); #endif SortArray<ParticleInstanceData3D, ParticlesViewSort> sorter; sorter.compare.z_dir = axis; @@ -978,13 +1007,12 @@ void ParticlesStorage::update_particles() { glBindBufferBase(GL_UNIFORM_BUFFER, PARTICLES_GLOBALS_UNIFORM_LOCATION, global_buffer); glBindBuffer(GL_UNIFORM_BUFFER, 0); - while (particle_update_list) { + while (particle_update_list.first()) { // Use transform feedback to process particles. - Particles *particles = particle_update_list; + Particles *particles = particle_update_list.first()->self(); - particle_update_list = particles->update_list; - particles->update_list = nullptr; + particles->update_list.remove_from_list(); particles->dirty = false; _particles_update_buffers(particles); @@ -1133,7 +1161,7 @@ void ParticlesStorage::_particles_reverse_lifetime_sort(Particles *particles) { LocalVector<ParticleInstanceData> particle_vector; particle_vector.resize(particles->amount); particle_array = particle_vector.ptr(); - glGetBufferSubData(GL_ARRAY_BUFFER, 0, buffer_size, particle_array); + godot_webgl2_glGetBufferSubData(GL_ARRAY_BUFFER, 0, buffer_size, particle_array); #endif uint32_t lifetime_split = (MIN(int(particles->amount * particles->sort_buffer_phase), particles->amount - 1) + 1) % particles->amount; diff --git a/drivers/gles3/storage/particles_storage.h b/drivers/gles3/storage/particles_storage.h index 5146dc5329..8451986a61 100644 --- a/drivers/gles3/storage/particles_storage.h +++ b/drivers/gles3/storage/particles_storage.h @@ -128,7 +128,7 @@ private: float delta; float particle_size; - float pad0; + float amount_ratio; float pad1; float pad2; @@ -138,6 +138,8 @@ private: uint32_t frame; float emission_transform[16]; + float emitter_velocity[3]; + float interp_to_end; Attractor attractors[MAX_ATTRACTORS]; Collider colliders[MAX_COLLIDERS]; @@ -149,6 +151,7 @@ private: double inactive_time = 0.0; bool emitting = false; bool one_shot = false; + float amount_ratio = 1.0; int amount = 0; double lifetime = 1.0; double pre_process_time = 0.0; @@ -209,7 +212,7 @@ private: uint32_t userdata_count = 0; bool dirty = false; - Particles *update_list = nullptr; + SelfList<Particles> update_list; double phase = 0.0; double prev_phase = 0.0; @@ -229,6 +232,8 @@ private: bool clear = true; Transform3D emission_transform; + Vector3 emitter_velocity; + float interp_to_end; HashSet<RID> collisions; @@ -237,7 +242,8 @@ private: double trail_length = 1.0; bool trails_enabled = false; - Particles() { + Particles() : + update_list(this) { } }; @@ -259,7 +265,7 @@ private: RID copy_shader_version; } particles_shader; - Particles *particle_update_list = nullptr; + SelfList<Particles>::List particle_update_list; mutable RID_Owner<Particles, true> particles_owner; @@ -313,6 +319,7 @@ public: virtual void particles_emit(RID p_particles, const Transform3D &p_transform, const Vector3 &p_velocity, const Color &p_color, const Color &p_custom, uint32_t p_emit_flags) override; virtual void particles_set_emitting(RID p_particles, bool p_emitting) override; virtual void particles_set_amount(RID p_particles, int p_amount) override; + virtual void particles_set_amount_ratio(RID p_particles, float p_amount_ratio) override; virtual void particles_set_lifetime(RID p_particles, double p_lifetime) override; virtual void particles_set_one_shot(RID p_particles, bool p_one_shot) override; virtual void particles_set_pre_process_time(RID p_particles, double p_time) override; @@ -347,6 +354,8 @@ public: virtual AABB particles_get_aabb(RID p_particles) const override; virtual void particles_set_emission_transform(RID p_particles, const Transform3D &p_transform) override; + virtual void particles_set_emitter_velocity(RID p_particles, const Vector3 &p_velocity) override; + virtual void particles_set_interp_to_end(RID p_particles, float p_interp) override; virtual bool particles_get_emitting(RID p_particles) override; virtual int particles_get_draw_passes(RID p_particles) const override; diff --git a/drivers/gles3/storage/utilities.cpp b/drivers/gles3/storage/utilities.cpp index 07df445018..7deeefc37d 100644 --- a/drivers/gles3/storage/utilities.cpp +++ b/drivers/gles3/storage/utilities.cpp @@ -115,7 +115,7 @@ Vector<uint8_t> Utilities::buffer_get_data(GLenum p_target, GLuint p_buffer, uin #if defined(__EMSCRIPTEN__) { uint8_t *w = ret.ptrw(); - glGetBufferSubData(p_target, 0, p_buffer_size, w); + godot_webgl2_glGetBufferSubData(p_target, 0, p_buffer_size, w); } #else void *data = glMapBufferRange(p_target, 0, p_buffer_size, GL_MAP_READ_BIT); diff --git a/drivers/unix/SCsub b/drivers/unix/SCsub index 91ef613546..146563a3b6 100644 --- a/drivers/unix/SCsub +++ b/drivers/unix/SCsub @@ -4,4 +4,4 @@ Import("env") env.add_source_files(env.drivers_sources, "*.cpp") -env["check_c_headers"] = [["mntent.h", "HAVE_MNTENT"]] +env["check_c_headers"] = {"mntent.h": "HAVE_MNTENT"} diff --git a/drivers/unix/dir_access_unix.cpp b/drivers/unix/dir_access_unix.cpp index a162f46103..46efb45934 100644 --- a/drivers/unix/dir_access_unix.cpp +++ b/drivers/unix/dir_access_unix.cpp @@ -38,9 +38,11 @@ #include "core/templates/list.h" #include <errno.h> +#include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <sys/ioctl.h> #include <sys/statvfs.h> #ifdef HAVE_MNTENT @@ -339,7 +341,7 @@ Error DirAccessUnix::change_dir(String p_dir) { // prev_dir is the directory we are changing out of String prev_dir; char real_current_dir_name[2048]; - ERR_FAIL_COND_V(getcwd(real_current_dir_name, 2048) == nullptr, ERR_BUG); + ERR_FAIL_NULL_V(getcwd(real_current_dir_name, 2048), ERR_BUG); if (prev_dir.parse_utf8(real_current_dir_name) != OK) { prev_dir = real_current_dir_name; //no utf8, maybe latin? } @@ -361,7 +363,7 @@ Error DirAccessUnix::change_dir(String p_dir) { String base = _get_root_path(); if (!base.is_empty() && !try_dir.begins_with(base)) { - ERR_FAIL_COND_V(getcwd(real_current_dir_name, 2048) == nullptr, ERR_BUG); + ERR_FAIL_NULL_V(getcwd(real_current_dir_name, 2048), ERR_BUG); String new_dir; new_dir.parse_utf8(real_current_dir_name); @@ -488,6 +490,27 @@ bool DirAccessUnix::is_hidden(const String &p_name) { return p_name != "." && p_name != ".." && p_name.begins_with("."); } +bool DirAccessUnix::is_case_sensitive(const String &p_path) const { +#if defined(LINUXBSD_ENABLED) + String f = p_path; + if (!f.is_absolute_path()) { + f = get_current_dir().path_join(f); + } + f = fix_path(f); + + int fd = ::open(f.utf8().get_data(), O_RDONLY | O_NONBLOCK); + if (fd) { + long flags = 0; + if (ioctl(fd, _IOR('f', 1, long), &flags) >= 0) { + ::close(fd); + return !(flags & 0x40000000 /* FS_CASEFOLD_FL */); + } + ::close(fd); + } +#endif + return true; +} + DirAccessUnix::DirAccessUnix() { dir_stream = nullptr; _cisdir = false; @@ -496,7 +519,7 @@ DirAccessUnix::DirAccessUnix() { // set current directory to an absolute path of the current directory char real_current_dir_name[2048]; - ERR_FAIL_COND(getcwd(real_current_dir_name, 2048) == nullptr); + ERR_FAIL_NULL(getcwd(real_current_dir_name, 2048)); if (current_dir.parse_utf8(real_current_dir_name) != OK) { current_dir = real_current_dir_name; } diff --git a/drivers/unix/dir_access_unix.h b/drivers/unix/dir_access_unix.h index 68ad869003..8d13ff1fa8 100644 --- a/drivers/unix/dir_access_unix.h +++ b/drivers/unix/dir_access_unix.h @@ -82,6 +82,8 @@ public: virtual String read_link(String p_file) override; virtual Error create_link(String p_source, String p_target) override; + virtual bool is_case_sensitive(const String &p_path) const override; + virtual uint64_t get_space_left() override; virtual String get_filesystem_type() const override; diff --git a/drivers/unix/os_unix.cpp b/drivers/unix/os_unix.cpp index 4d9549c5a6..581daaca05 100644 --- a/drivers/unix/os_unix.cpp +++ b/drivers/unix/os_unix.cpp @@ -81,6 +81,10 @@ #include <time.h> #include <unistd.h> +#ifndef RTLD_DEEPBIND +#define RTLD_DEEPBIND 0 +#endif + #if defined(MACOS_ENABLED) || (defined(__ANDROID_API__) && __ANDROID_API__ >= 28) // Random location for getentropy. Fitting. #include <sys/random.h> @@ -646,7 +650,7 @@ Error OS_Unix::open_dynamic_library(const String p_path, void *&p_library_handle path = get_executable_path().get_base_dir().path_join("../lib").path_join(p_path.get_file()); } - p_library_handle = dlopen(path.utf8().get_data(), RTLD_NOW); + p_library_handle = dlopen(path.utf8().get_data(), RTLD_NOW | RTLD_DEEPBIND); ERR_FAIL_NULL_V_MSG(p_library_handle, ERR_CANT_OPEN, vformat("Can't open dynamic library: %s. Error: %s.", p_path, dlerror())); if (r_resolved_path != nullptr) { diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 11321b2121..5c68149a5f 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -4499,6 +4499,7 @@ RID RenderingDeviceVulkan::vertex_array_create(uint32_t p_vertex_count, VertexFo if (atf.frequency == VERTEX_FREQUENCY_VERTEX) { // Validate size for regular drawing. uint64_t total_size = uint64_t(atf.stride) * (p_vertex_count - 1) + atf.offset + element_size; + ERR_FAIL_COND_V_MSG(total_size > buffer->size, RID(), "Attachment (" + itos(i) + ") will read past the end of the buffer."); @@ -4665,7 +4666,7 @@ struct RenderingDeviceVulkanShaderBinarySpecializationConstant { }; struct RenderingDeviceVulkanShaderBinaryData { - uint32_t vertex_input_mask; + uint64_t vertex_input_mask; uint32_t fragment_output_mask; uint32_t specialization_constants_count; uint32_t is_compute; @@ -4881,7 +4882,7 @@ RID RenderingDeviceVulkan::shader_create_from_bytecode(const Vector<uint8_t> &p_ push_constant.size = binary_data.push_constant_size; push_constant.vk_stages_mask = binary_data.push_constant_vk_stages_mask; - uint32_t vertex_input_mask = binary_data.vertex_input_mask; + uint64_t vertex_input_mask = binary_data.vertex_input_mask; uint32_t fragment_output_mask = binary_data.fragment_output_mask; @@ -5054,6 +5055,7 @@ RID RenderingDeviceVulkan::shader_create_from_bytecode(const Vector<uint8_t> &p_ } Shader *shader = shader_owner.get_or_null(id); + ERR_FAIL_NULL_V(shader, RID()); shader->vertex_input_mask = vertex_input_mask; shader->fragment_output_mask = fragment_output_mask; @@ -5208,7 +5210,7 @@ RID RenderingDeviceVulkan::shader_create_placeholder() { return shader_owner.make_rid(shader); } -uint32_t RenderingDeviceVulkan::shader_get_vertex_input_attribute_mask(RID p_shader) { +uint64_t RenderingDeviceVulkan::shader_get_vertex_input_attribute_mask(RID p_shader) { _THREAD_SAFE_METHOD_ const Shader *shader = shader_owner.get_or_null(p_shader); @@ -6151,8 +6153,8 @@ RID RenderingDeviceVulkan::render_pipeline_create(RID p_shader, FramebufferForma pipeline_vertex_input_state_create_info = vd.create_info; // Validate with inputs. - for (uint32_t i = 0; i < 32; i++) { - if (!(shader->vertex_input_mask & (1UL << i))) { + for (uint64_t i = 0; i < 64; i++) { + if (!(shader->vertex_input_mask & (1ULL << i))) { continue; } bool found = false; diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index fd14449ee7..7c514c11f8 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -621,7 +621,7 @@ class RenderingDeviceVulkan : public RenderingDevice { VkDescriptorSetLayout descriptor_set_layout = VK_NULL_HANDLE; }; - uint32_t vertex_input_mask = 0; // Inputs used, this is mostly for validation. + uint64_t vertex_input_mask = 0; // Inputs used, this is mostly for validation. uint32_t fragment_output_mask = 0; struct PushConstant { @@ -1140,7 +1140,7 @@ public: virtual RID shader_create_from_bytecode(const Vector<uint8_t> &p_shader_binary, RID p_placeholder = RID()); virtual RID shader_create_placeholder(); - virtual uint32_t shader_get_vertex_input_attribute_mask(RID p_shader); + virtual uint64_t shader_get_vertex_input_attribute_mask(RID p_shader); /*****************/ /**** UNIFORM ****/ diff --git a/drivers/vulkan/vulkan_context.cpp b/drivers/vulkan/vulkan_context.cpp index 491d59bd7b..890fd7277f 100644 --- a/drivers/vulkan/vulkan_context.cpp +++ b/drivers/vulkan/vulkan_context.cpp @@ -1375,15 +1375,16 @@ Error VulkanContext::_create_physical_device(VkSurfaceKHR p_surface) { // Query fine-grained feature support for this device. // If app has specific feature requirements it should check supported // features based on this query - vkGetPhysicalDeviceFeatures(gpu, &physical_device_features); + VkPhysicalDeviceFeatures features = {}; + vkGetPhysicalDeviceFeatures(gpu, &features); // Check required features and abort if any of them is missing. - if (!physical_device_features.imageCubeArray || !physical_device_features.independentBlend) { + if (!features.imageCubeArray || !features.independentBlend) { String error_string = vformat("Your GPU (%s) does not support the following features which are required to use Vulkan-based renderers in Godot:\n\n", device_name); - if (!physical_device_features.imageCubeArray) { + if (!features.imageCubeArray) { error_string += "- No support for image cube arrays.\n"; } - if (!physical_device_features.independentBlend) { + if (!features.independentBlend) { error_string += "- No support for independentBlend.\n"; } error_string += "\nThis is usually a hardware limitation, so updating graphics drivers won't help in most cases."; @@ -1398,12 +1399,95 @@ Error VulkanContext::_create_physical_device(VkSurfaceKHR p_surface) { return ERR_CANT_CREATE; } - physical_device_features.robustBufferAccess = false; // Turn off robust buffer access, which can hamper performance on some hardware. + memset(&physical_device_features, 0, sizeof(physical_device_features)); +#define VK_DEVICEFEATURE_ENABLE_IF(x) \ + if (features.x) { \ + physical_device_features.x = features.x; \ + } else \ + ((void)0) + + // + // Opt-in to the features we actually need/use. These can be changed in the future. + // We do this for multiple reasons: + // + // 1. Certain features (like sparse* stuff) cause unnecessary internal driver allocations. + // 2. Others like shaderStorageImageMultisample are a huge red flag + // (MSAA + Storage is rarely needed). + // 3. Most features when turned off aren't actually off (we just promise the driver not to use them) + // and it is validation what will complain. This allows us to target a minimum baseline. + // + // TODO: Allow the user to override these settings (i.e. turn off more stuff) using profiles + // so they can target a broad range of HW. For example Mali HW does not have + // shaderClipDistance/shaderCullDistance; thus validation would complain if such feature is used; + // allowing them to fix the problem without even owning Mali HW to test on. + // + + // Turn off robust buffer access, which can hamper performance on some hardware. + //VK_DEVICEFEATURE_ENABLE_IF(robustBufferAccess); + VK_DEVICEFEATURE_ENABLE_IF(fullDrawIndexUint32); + VK_DEVICEFEATURE_ENABLE_IF(imageCubeArray); + VK_DEVICEFEATURE_ENABLE_IF(independentBlend); + VK_DEVICEFEATURE_ENABLE_IF(geometryShader); + VK_DEVICEFEATURE_ENABLE_IF(tessellationShader); + VK_DEVICEFEATURE_ENABLE_IF(sampleRateShading); + VK_DEVICEFEATURE_ENABLE_IF(dualSrcBlend); + VK_DEVICEFEATURE_ENABLE_IF(logicOp); + VK_DEVICEFEATURE_ENABLE_IF(multiDrawIndirect); + VK_DEVICEFEATURE_ENABLE_IF(drawIndirectFirstInstance); + VK_DEVICEFEATURE_ENABLE_IF(depthClamp); + VK_DEVICEFEATURE_ENABLE_IF(depthBiasClamp); + VK_DEVICEFEATURE_ENABLE_IF(fillModeNonSolid); + VK_DEVICEFEATURE_ENABLE_IF(depthBounds); + VK_DEVICEFEATURE_ENABLE_IF(wideLines); + VK_DEVICEFEATURE_ENABLE_IF(largePoints); + VK_DEVICEFEATURE_ENABLE_IF(alphaToOne); + VK_DEVICEFEATURE_ENABLE_IF(multiViewport); + VK_DEVICEFEATURE_ENABLE_IF(samplerAnisotropy); + VK_DEVICEFEATURE_ENABLE_IF(textureCompressionETC2); + VK_DEVICEFEATURE_ENABLE_IF(textureCompressionASTC_LDR); + VK_DEVICEFEATURE_ENABLE_IF(textureCompressionBC); + //VK_DEVICEFEATURE_ENABLE_IF(occlusionQueryPrecise); + //VK_DEVICEFEATURE_ENABLE_IF(pipelineStatisticsQuery); + VK_DEVICEFEATURE_ENABLE_IF(vertexPipelineStoresAndAtomics); + VK_DEVICEFEATURE_ENABLE_IF(fragmentStoresAndAtomics); + VK_DEVICEFEATURE_ENABLE_IF(shaderTessellationAndGeometryPointSize); + VK_DEVICEFEATURE_ENABLE_IF(shaderImageGatherExtended); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageExtendedFormats); + // Intel Arc doesn't support shaderStorageImageMultisample (yet? could be a driver thing), so it's + // better for Validation to scream at us if we use it. Furthermore MSAA Storage is a huge red flag + // for performance. + //VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageMultisample); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageReadWithoutFormat); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageWriteWithoutFormat); + VK_DEVICEFEATURE_ENABLE_IF(shaderUniformBufferArrayDynamicIndexing); + VK_DEVICEFEATURE_ENABLE_IF(shaderSampledImageArrayDynamicIndexing); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageBufferArrayDynamicIndexing); + VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageArrayDynamicIndexing); + VK_DEVICEFEATURE_ENABLE_IF(shaderClipDistance); + VK_DEVICEFEATURE_ENABLE_IF(shaderCullDistance); + VK_DEVICEFEATURE_ENABLE_IF(shaderFloat64); + VK_DEVICEFEATURE_ENABLE_IF(shaderInt64); + VK_DEVICEFEATURE_ENABLE_IF(shaderInt16); + //VK_DEVICEFEATURE_ENABLE_IF(shaderResourceResidency); + VK_DEVICEFEATURE_ENABLE_IF(shaderResourceMinLod); + // We don't use sparse features and enabling them cause extra internal + // allocations inside the Vulkan driver we don't need. + //VK_DEVICEFEATURE_ENABLE_IF(sparseBinding); + //VK_DEVICEFEATURE_ENABLE_IF(sparseResidencyBuffer); + //VK_DEVICEFEATURE_ENABLE_IF(sparseResidencyImage2D); + //VK_DEVICEFEATURE_ENABLE_IF(sparseResidencyImage3D); + //VK_DEVICEFEATURE_ENABLE_IF(sparseResidency2Samples); + //VK_DEVICEFEATURE_ENABLE_IF(sparseResidency4Samples); + //VK_DEVICEFEATURE_ENABLE_IF(sparseResidency8Samples); + //VK_DEVICEFEATURE_ENABLE_IF(sparseResidency16Samples); + //VK_DEVICEFEATURE_ENABLE_IF(sparseResidencyAliased); + VK_DEVICEFEATURE_ENABLE_IF(variableMultisampleRate); + //VK_DEVICEFEATURE_ENABLE_IF(inheritedQueries); #define GET_INSTANCE_PROC_ADDR(inst, entrypoint) \ { \ fp##entrypoint = (PFN_vk##entrypoint)vkGetInstanceProcAddr(inst, "vk" #entrypoint); \ - ERR_FAIL_COND_V_MSG(fp##entrypoint == nullptr, ERR_CANT_CREATE, \ + ERR_FAIL_NULL_V_MSG(fp##entrypoint, ERR_CANT_CREATE, \ "vkGetInstanceProcAddr failed to find vk" #entrypoint); \ } @@ -1605,7 +1689,7 @@ Error VulkanContext::_initialize_queues(VkSurfaceKHR p_surface) { if (!g_gdpa) \ g_gdpa = (PFN_vkGetDeviceProcAddr)vkGetInstanceProcAddr(inst, "vkGetDeviceProcAddr"); \ fp##entrypoint = (PFN_vk##entrypoint)g_gdpa(dev, "vk" #entrypoint); \ - ERR_FAIL_COND_V_MSG(fp##entrypoint == nullptr, ERR_CANT_CREATE, \ + ERR_FAIL_NULL_V_MSG(fp##entrypoint, ERR_CANT_CREATE, \ "vkGetDeviceProcAddr failed to find vk" #entrypoint); \ } diff --git a/drivers/windows/dir_access_windows.cpp b/drivers/windows/dir_access_windows.cpp index 26b8881c39..8bf83823a0 100644 --- a/drivers/windows/dir_access_windows.cpp +++ b/drivers/windows/dir_access_windows.cpp @@ -32,6 +32,7 @@ #include "dir_access_windows.h" +#include "core/config/project_settings.h" #include "core/os/memory.h" #include "core/string/print_string.h" @@ -40,6 +41,26 @@ #define WIN32_LEAN_AND_MEAN #include <windows.h> +typedef struct _NT_IO_STATUS_BLOCK { + union { + LONG Status; + PVOID Pointer; + } DUMMY; + ULONG_PTR Information; +} NT_IO_STATUS_BLOCK; + +typedef struct _NT_FILE_CASE_SENSITIVE_INFO { + ULONG Flags; +} NT_FILE_CASE_SENSITIVE_INFO; + +typedef enum _NT_FILE_INFORMATION_CLASS { + FileCaseSensitiveInformation = 71, +} NT_FILE_INFORMATION_CLASS; + +#define NT_FILE_CS_FLAG_CASE_SENSITIVE_DIR 0x00000001 + +extern "C" NTSYSAPI LONG NTAPI NtQueryInformationFile(HANDLE FileHandle, NT_IO_STATUS_BLOCK *IoStatusBlock, PVOID FileInformation, ULONG Length, NT_FILE_INFORMATION_CLASS FileInformationClass); + struct DirAccessWindowsPrivate { HANDLE h; // handle for FindFirstFile. WIN32_FIND_DATA f; @@ -340,6 +361,33 @@ String DirAccessWindows::get_filesystem_type() const { ERR_FAIL_V(""); } +bool DirAccessWindows::is_case_sensitive(const String &p_path) const { + String f = p_path; + if (!f.is_absolute_path()) { + f = get_current_dir().path_join(f); + } + f = fix_path(f); + + HANDLE h_file = ::CreateFileW((LPCWSTR)(f.utf16().get_data()), 0, + FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, + nullptr, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, nullptr); + + if (h_file == INVALID_HANDLE_VALUE) { + return false; + } + + NT_IO_STATUS_BLOCK io_status_block; + NT_FILE_CASE_SENSITIVE_INFO file_info; + LONG out = NtQueryInformationFile(h_file, &io_status_block, &file_info, sizeof(NT_FILE_CASE_SENSITIVE_INFO), FileCaseSensitiveInformation); + ::CloseHandle(h_file); + + if (out >= 0) { + return file_info.Flags & NT_FILE_CS_FLAG_CASE_SENSITIVE_DIR; + } else { + return false; + } +} + DirAccessWindows::DirAccessWindows() { p = memnew(DirAccessWindowsPrivate); p->h = INVALID_HANDLE_VALUE; diff --git a/drivers/windows/dir_access_windows.h b/drivers/windows/dir_access_windows.h index 1e55917756..1dcab84c9d 100644 --- a/drivers/windows/dir_access_windows.h +++ b/drivers/windows/dir_access_windows.h @@ -84,6 +84,7 @@ public: uint64_t get_space_left() override; virtual String get_filesystem_type() const override; + virtual bool is_case_sensitive(const String &p_path) const override; DirAccessWindows(); ~DirAccessWindows(); |